In this project, our goal was to predict stock prices using a machine learning approach. To achieve this, we designed and implemented a model based on a set of carefully chosen features. These features included technical indicators such as Relative Strength Index (RSI), Money Flow Index (MFI), Exponential Moving Averages (EMA), Simple Moving Average (SMA),Moving Average Convergence Divergence (MACD) as well as historical price data encompassing the previous 1 day, 3 days, 5 days, and 1, 2, 3, 4 weeks. Additionally, rolling average values for high, low, open, close, adjusted close, and volume were incorporated.
import os
import time
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
pd.set_option('display.max_columns', None)
# Chart drawing
import plotly as py
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# Mute sklearn warnings
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=DeprecationWarning)
# Show charts when running kernel
#init_notebook_mode(connected=True)
# Change default background color for all visualizations
layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(250,250,250,0.8)')
fig = go.Figure(layout=layout)
templated_fig = pio.to_templated(fig)
pio.templates['my_template'] = templated_fig.layout.template
pio.templates.default = 'my_template'
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="xgboost")
warnings.filterwarnings("ignore")
def evaluate_regression_model(y_true, y_pred):
"""
Calculate and print evaluation metrics for a regression model.
Parameters:
- y_true: Actual values.
- y_pred: Predicted values.
Returns:
- Dictionary containing the evaluation metrics.
"""
# Calculate evaluation metrics
mse = mean_squared_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
# Print the results
print(f'Mean Squared Error (MSE): {np.round(mse,3)}')
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f'Mean Absolute Error (MAE): {np.round(mae,3)}')
print(f'R-squared (R2): {np.round(r2,3)}')
# Return results as a dictionary
results = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
return results
def evaluate_regression_model2(y_true, y_pred):
"""
Calculate and print evaluation metrics for a regression model.
Parameters:
- y_true: Actual values.
- y_pred: Predicted values.
Returns:
- Dictionary containing the evaluation metrics.
"""
# Calculate evaluation metrics
mse = mean_squared_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
# # Print the results
# print(f'Mean Squared Error (MSE): {np.round(mse,3)}')
# print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
# print(f'Mean Absolute Error (MAE): {np.round(mae,3)}')
# print(f'R-squared (R2): {np.round(r2,3)}')
# Return results as a dictionary
results = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
return results
# Returns RSI values
def rsi(df, periods = 14):
"""
Calculate the Relative Strength Index (RSI) for a given DataFrame.
Parameters:
- df (DataFrame): Pandas DataFrame with a 'close' column.
- periods (int): Number of periods to consider for RSI calculation. Default is 14.
Returns:
- Series: A pandas Series containing the RSI values.
"""
close = df['close']
close_delta = close.diff()
# Make two series: one for lower closes and one for higher closes
up = close_delta.clip(lower=0)
down = -1 * close_delta.clip(upper=0)
ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
rsi = ma_up / ma_down
rsi = 100 - (100/(1 + rsi))
return rsi
def gain(x):
return ((x > 0) * x).sum()
def loss(x):
return ((x < 0) * x).sum()
def mfi(df, n=14):
"""
Calculate the Money Flow Index (MFI) for a given DataFrame.
Parameters:
- df (DataFrame): Pandas DataFrame with 'high', 'low', 'close', and 'volume' columns.
- n (int): Number of periods to use for the MFI calculation. Default is 14.
Returns:
- numpy.ndarray: An array containing the MFI values.
"""
high = df['high']
low = df['low']
close = df['close']
volume = df['volume']
typical_price = (high + low + close) / 3
money_flow = typical_price * volume
mf_sign = np.where(typical_price > typical_price.shift(1), 1, -1)
signed_mf = money_flow * mf_sign
# Calculate gain and loss using vectorized operations
positive_mf = np.where(signed_mf > 0, signed_mf, 0)
negative_mf = np.where(signed_mf < 0, -signed_mf, 0)
mf_avg_gain = pd.Series(positive_mf).rolling(n, min_periods=1).sum()
mf_avg_loss = pd.Series(negative_mf).rolling(n, min_periods=1).sum()
return (100 - 100 / (1 + mf_avg_gain / mf_avg_loss)).to_numpy()
def plot_regression_accuracy(y_true, y_pred):
"""
Create various plots to evaluate the accuracy of a linear regression model.
Parameters:
- y_true: Actual values.
- y_pred: Predicted values.
"""
# Scatter Plot
plt.scatter(y_true, y_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Scatter Plot of Actual vs Predicted Values')
plt.show()
# Residual Plot
residuals = y_true - y_pred
plt.scatter(y_pred, residuals)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residual Plot')
plt.show()
# Distribution of Residuals
sns.histplot(residuals, kde=True)
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.title('Distribution of Residuals')
plt.show()
# Predicted vs Actual Line
plt.plot(y_true, y_true, linestyle='--', color='r', label='Perfect Fit')
plt.scatter(y_true, y_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Predicted vs Actual Values with Perfect Fit Line')
plt.legend()
plt.show()
def plot_predictions(df,prediction):
"""
Create a Plotly graph to compare actual values with predictions.
Parameters:
- df (DataFrame): A pandas DataFrame containing 'date' and 'close_1d_next' columns.
- prediction (array-like): Predicted values corresponding to the test set.
"""
plot_test_df= df[df.date.dt.year>=2020]
plot_test_df['prediction'] = prediction
fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=df.date, y=df.close_1d_next,
name='Truth',
marker_color='LightSkyBlue'), row=1, col=1)
fig.add_trace(go.Scatter(x=plot_test_df.date,
y=plot_test_df.prediction,
name='Prediction',
marker_color='MediumPurple'), row=1, col=1)
# Add title and Y-axis title for the first subplot
fig.update_layout(title_text='Train Data and Test Data', title_x=0.5, title_y=0.9)
fig.update_yaxes(title_text='Prediction', row=1, col=1)
fig.add_trace(go.Scatter(x=plot_test_df.date,
y=y_test,
name='Truth',
marker_color='LightSkyBlue',
showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=plot_test_df.date,
y=prediction,
name='Prediction',
marker_color='MediumPurple',
showlegend=False), row=2, col=1)
fig.update_yaxes(title_text='Prediction', row=2, col=1)
fig.show()
def plot_feature_importance(model,X_train,top_features):
"""
Plot the feature importance from a linear regression model and return a sorted DataFrame of feature importances.
Parameters:
- model: A trained linear regression model with a coef_ attribute.
- X_train (DataFrame): The DataFrame used to train the model, for feature names.
- num_top_features (int): Number of top features to display.
Returns:
- DataFrame: Sorted DataFrame with features and their importance.
"""
# Get feature importance scores (coefficients)
feature_importance = model.coef_
# Create a DataFrame to store feature names and importance scores
feature_importance_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': np.abs(feature_importance)})
# Sort features by importance
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False).reset_index(drop=True)
top_features = top_features
# Plot feature importance
plt.figure(figsize=(20, 6))
plt.barh(range(len(feature_importance_df[:top_features])), feature_importance_df[:top_features]['Importance'], align="center")
plt.yticks(range(len(feature_importance_df[:top_features])), labels=feature_importance_df[:top_features]['Feature'])
plt.ylabel("Features")
plt.xlabel("Coefficient Magnitude")
plt.title(f"Top {top_features} Feature Importance Values")
plt.show()
return feature_importance_df
out_loc = '/Users/isapocan/Desktop/LSU/data/'
# Define the file path for the parquet file
parquet_file_path = out_loc + "stock_1d.parquet"
try:
# Read the Parquet file into a DataFrame
df = pd.read_parquet(parquet_file_path)
# Convert column names to lowercase for consistency
df.columns = df.columns.str.lower()
# Display the first few rows of the DataFrame
display(df.head())
except Exception as e:
print(f"An error occurred while reading the file: {e}")
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2013-01-02 | 94.190002 | 94.790001 | 93.959999 | 94.779999 | 67.895119 | 3206700.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 1 | 2013-01-03 | 94.339996 | 94.930000 | 94.129997 | 94.669998 | 67.816322 | 2704600.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 2 | 2013-01-04 | 94.790001 | 95.480003 | 94.540001 | 95.370003 | 68.317757 | 2704900.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 3 | 2013-01-07 | 95.019997 | 95.730003 | 94.760002 | 95.489998 | 68.403717 | 2745800.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 4 | 2013-01-08 | 95.169998 | 95.750000 | 95.099998 | 95.500000 | 68.410889 | 2655500.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
# Filter the DataFrame to include only rows where 'symbol' is 'MDLZ'
df = df[df['symbol']=='MDLZ']
# Display the first few rows and the shape of the filtered DataFrame
display(df.head())
display(df.shape)
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 852843 | 2013-01-02 | 25.840000 | 26.690001 | 25.780001 | 26.670000 | 21.445908 | 17862400.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
| 852844 | 2013-01-03 | 26.700001 | 26.770000 | 26.490000 | 26.639999 | 21.421791 | 9075500.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
| 852845 | 2013-01-04 | 26.700001 | 26.830000 | 26.549999 | 26.740000 | 21.502203 | 7696000.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
| 852846 | 2013-01-07 | 26.620001 | 26.740000 | 26.549999 | 26.660000 | 21.437866 | 7576200.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
| 852847 | 2013-01-08 | 26.520000 | 26.920000 | 26.459999 | 26.680000 | 21.453959 | 14360800.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 |
(2733, 15)
Description: RSI helps you understand if a stock is likely to be overbought (prices too high) or oversold (prices too low). It looks at recent price changes to make this determination.
Description: MFI considers both price and trading volume to identify if a stock is overbought or oversold. It helps gauge the strength of buying and selling pressure.
Description: EMA smoothens out price data, giving more weight to recent prices. It reacts faster to price changes compared to a Simple Moving Average (SMA), making it useful for trend analysis.
Description: SMA is a basic average of stock prices over a specific period. It provides a smoothed representation of the overall price trend, helping to identify general market direction.
Description: MACD is a trend-following momentum indicator that shows the relationship between two moving averages of a security's price. It helps identify potential trend reversals or momentum shifts.
Description: The MACD signal line is a nine-day EMA of the MACD. It is used to generate trading signals. When the MACD crosses above the signal line, it might be a signal to buy, and when it crosses below, it might be a signal to sell.
def add_moving_averages(df, column_name):
"""
Adds various moving averages to the DataFrame.
Parameters:
- df (DataFrame): The DataFrame to modify.
- column_name (str): The column name to calculate moving averages for.
"""
# Exponential Moving Average (EMA)
df['ema_9'] = df[column_name].ewm(span=9).mean().shift()
# Simple Moving Averages (SMA) with different periods
for period in [5, 10, 15, 30]:
df[f'sma_{period}'] = df[column_name].rolling(window=period).mean().shift()
# Add moving averages for the 'close' column
add_moving_averages(df, 'close')
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2733 entries, 852843 to 855575 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 2733 non-null datetime64[ns] 1 open 2733 non-null float64 2 high 2733 non-null float64 3 low 2733 non-null float64 4 close 2733 non-null float64 5 adj close 2733 non-null float64 6 volume 2733 non-null float64 7 symbol 2733 non-null object 8 security 2733 non-null object 9 gics sector 2733 non-null object 10 gics sub-industry 2733 non-null object 11 headquarters location 2733 non-null object 12 date added 2733 non-null object 13 cik 2733 non-null int64 14 founded 2733 non-null object 15 ema_9 2732 non-null float64 16 sma_5 2728 non-null float64 17 sma_10 2723 non-null float64 18 sma_15 2718 non-null float64 19 sma_30 2703 non-null float64 dtypes: datetime64[ns](1), float64(11), int64(1), object(7) memory usage: 448.4+ KB
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2733 entries, 852843 to 855575 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 2733 non-null datetime64[ns] 1 open 2733 non-null float64 2 high 2733 non-null float64 3 low 2733 non-null float64 4 close 2733 non-null float64 5 adj close 2733 non-null float64 6 volume 2733 non-null float64 7 symbol 2733 non-null object 8 security 2733 non-null object 9 gics sector 2733 non-null object 10 gics sub-industry 2733 non-null object 11 headquarters location 2733 non-null object 12 date added 2733 non-null object 13 cik 2733 non-null int64 14 founded 2733 non-null object 15 ema_9 2732 non-null float64 16 sma_5 2728 non-null float64 17 sma_10 2723 non-null float64 18 sma_15 2718 non-null float64 19 sma_30 2703 non-null float64 dtypes: datetime64[ns](1), float64(11), int64(1), object(7) memory usage: 448.4+ KB
# Add a Relative Strength Index (RSI) column to the DataFrame
try:
df['rsi'] = rsi(df) # Uncomment and adjust fillna(0) if appropriate for handling missing values
except Exception as e:
print(f"Error calculating RSI: {e}")
# Add a Money Flow Index (MFI) column to the DataFrame
try:
df['mfi'] = mfi(df, 14) # The second argument is the period, here assumed to be 14
except Exception as e:
print(f"Error calculating MFI: {e}")
df[['date','close','ema_9','sma_5','sma_10','sma_15','sma_30','rsi','mfi']]
| date | close | ema_9 | sma_5 | sma_10 | sma_15 | sma_30 | rsi | mfi | |
|---|---|---|---|---|---|---|---|---|---|
| 852843 | 2013-01-02 | 26.670000 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 |
| 852844 | 2013-01-03 | 26.639999 | 26.670000 | NaN | NaN | NaN | NaN | NaN | 33.904295 |
| 852845 | 2013-01-04 | 26.740000 | 26.653333 | NaN | NaN | NaN | NaN | NaN | 48.695375 |
| 852846 | 2013-01-07 | 26.660000 | 26.688852 | NaN | NaN | NaN | NaN | NaN | 39.919745 |
| 852847 | 2013-01-08 | 26.680000 | 26.679078 | NaN | NaN | NaN | NaN | NaN | 55.233142 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 855571 | 2023-11-02 | 67.970001 | 65.583879 | 65.934001 | 65.321001 | 64.406001 | 65.993000 | 60.257764 | 89.207420 |
| 855572 | 2023-11-03 | 68.820000 | 66.061103 | 66.398001 | 65.697001 | 64.868001 | 65.901667 | 63.726091 | 89.458580 |
| 855573 | 2023-11-06 | 68.239998 | 66.612883 | 67.160001 | 66.169001 | 65.354001 | 65.848000 | 59.885606 | 83.710782 |
| 855574 | 2023-11-07 | 68.489998 | 66.938306 | 67.612000 | 66.594001 | 65.728667 | 65.799000 | 60.977252 | 75.937617 |
| 855575 | 2023-11-08 | 69.019997 | 67.248644 | 68.067999 | 66.888000 | 66.058667 | 65.729667 | 63.259914 | 75.566164 |
2733 rows × 9 columns
# Calculate and display the correlation between 'rsi' and 'mfi'
if {'rsi', 'mfi'}.issubset(df.columns):
correlation = df[['rsi', 'mfi']].corr()
print(correlation)
else:
print("DataFrame does not contain 'rsi' and/or 'mfi' columns.")
rsi mfi rsi 1.000000 0.698958 mfi 0.698958 1.000000
# calculating the Moving Average Convergence Divergence (MACD) and its signal line
# Ensure the 'close' column exists in the DataFrame
if 'close' in df.columns:
# Calculate the 12-period EMA of the closing prices
df['macd'] = df['close'].ewm(span=12, min_periods=12).mean() - df['close'].ewm(span=26, min_periods=26).mean()
# Calculate the 9-period EMA of the MACD values (signal line)
df['macd_signal'] = df['macd'].ewm(span=9, min_periods=9).mean()
else:
print("DataFrame does not contain 'close' column.")
# Check if the required columns exist in the DataFrame
if {'macd', 'macd_signal'}.issubset(df.columns):
# Select rows where 'macd' and 'macd_signal' columns do not have missing values
filtered_df = df[(~df['macd'].isna()) & (~df['macd_signal'].isna())]
# Display the first few rows of these columns
print(filtered_df[['macd', 'macd_signal']].head())
else:
print("DataFrame does not contain 'macd' and/or 'macd_signal' columns.")
macd macd_signal 852876 -0.147786 -0.050945 852877 -0.175230 -0.078792 852878 -0.198438 -0.104970 852879 -0.235462 -0.132994 852880 -0.226841 -0.152855
# Check if the required columns exist in the DataFrame
if {'date', 'close'}.issubset(df.columns):
# Create a new column representing the next day's closing price
df['close_1d_next'] = df['close'].shift(-1)
# Display the first few rows including 'date', 'close', and 'close_1d_next'
print(df[['date', 'close', 'close_1d_next']].head())
else:
print("DataFrame does not contain 'date' and/or 'close' columns.")
date close close_1d_next 852843 2013-01-02 26.670000 26.639999 852844 2013-01-03 26.639999 26.740000 852845 2013-01-04 26.740000 26.660000 852846 2013-01-07 26.660000 26.680000 852847 2013-01-08 26.680000 27.049999
def add_lagged_features(df, column_name, lags):
"""
Adds lagged features for a specified column in the DataFrame.
Parameters:
- df (DataFrame): The DataFrame to modify.
- column_name (str): The column name to create lagged features for.
- lags (list of int): The list of lag periods.
"""
for lag in lags:
df[f'{column_name}_{lag}d_ago'] = df[column_name].shift(lag)
def add_rolling_avg_features(df, column_name, windows):
"""
Adds rolling average features for a specified column in the DataFrame.
Parameters:
- df (DataFrame): The DataFrame to modify.
- column_name (str): The column name to create rolling average features for.
- windows (list of int): The list of rolling window sizes.
"""
for window in windows:
df[f'{column_name}_{window}d_avg'] = df[column_name].rolling(window=window).mean()
# Define lag periods and rolling window sizes
lag_periods = [1, 3, 5, 7, 14, 21, 28]
rolling_windows = [3, 5, 7, 10, 15, 30]
# Columns to create features for
columns = ['close', 'adj close', 'open', 'high', 'low', 'volume']
# Add lagged and rolling average features for each column
for column in columns:
add_lagged_features(df, column, lag_periods)
add_rolling_avg_features(df, column, rolling_windows)
# View the DataFrame
df.head()
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | ema_9 | sma_5 | sma_10 | sma_15 | sma_30 | rsi | mfi | macd | macd_signal | close_1d_next | close_1d_ago | close_3d_ago | close_5d_ago | close_7d_ago | close_14d_ago | close_21d_ago | close_28d_ago | close_3d_avg | close_5d_avg | close_7d_avg | close_10d_avg | close_15d_avg | close_30d_avg | adj close_1d_ago | adj close_3d_ago | adj close_5d_ago | adj close_7d_ago | adj close_14d_ago | adj close_21d_ago | adj close_28d_ago | adj close_3d_avg | adj close_5d_avg | adj close_7d_avg | adj close_10d_avg | adj close_15d_avg | adj close_30d_avg | open_1d_ago | open_3d_ago | open_5d_ago | open_7d_ago | open_14d_ago | open_21d_ago | open_28d_ago | open_3d_avg | open_5d_avg | open_7d_avg | open_10d_avg | open_15d_avg | open_30d_avg | high_1d_ago | high_3d_ago | high_5d_ago | high_7d_ago | high_14d_ago | high_21d_ago | high_28d_ago | high_3d_avg | high_5d_avg | high_7d_avg | high_10d_avg | high_15d_avg | high_30d_avg | low_1d_ago | low_3d_ago | low_5d_ago | low_7d_ago | low_14d_ago | low_21d_ago | low_28d_ago | low_3d_avg | low_5d_avg | low_7d_avg | low_10d_avg | low_15d_avg | low_30d_avg | volume_1d_ago | volume_3d_ago | volume_5d_ago | volume_7d_ago | volume_14d_ago | volume_21d_ago | volume_28d_ago | volume_3d_avg | volume_5d_avg | volume_7d_avg | volume_10d_avg | volume_15d_avg | volume_30d_avg | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 852843 | 2013-01-02 | 25.840000 | 26.690001 | 25.780001 | 26.670000 | 21.445908 | 17862400.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | NaN | NaN | 26.639999 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 852844 | 2013-01-03 | 26.700001 | 26.770000 | 26.490000 | 26.639999 | 21.421791 | 9075500.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.670000 | NaN | NaN | NaN | NaN | NaN | 33.904295 | NaN | NaN | 26.740000 | 26.670000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 21.445908 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 25.840000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 26.690001 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 25.780001 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 17862400.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 852845 | 2013-01-04 | 26.700001 | 26.830000 | 26.549999 | 26.740000 | 21.502203 | 7696000.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.653333 | NaN | NaN | NaN | NaN | NaN | 48.695375 | NaN | NaN | 26.660000 | 26.639999 | NaN | NaN | NaN | NaN | NaN | NaN | 26.683333 | NaN | NaN | NaN | NaN | NaN | 21.421791 | NaN | NaN | NaN | NaN | NaN | NaN | 21.456634 | NaN | NaN | NaN | NaN | NaN | 26.700001 | NaN | NaN | NaN | NaN | NaN | NaN | 26.413334 | NaN | NaN | NaN | NaN | NaN | 26.770000 | NaN | NaN | NaN | NaN | NaN | NaN | 26.763334 | NaN | NaN | NaN | NaN | NaN | 26.490000 | NaN | NaN | NaN | NaN | NaN | NaN | 26.273333 | NaN | NaN | NaN | NaN | NaN | 9075500.0 | NaN | NaN | NaN | NaN | NaN | NaN | 1.154463e+07 | NaN | NaN | NaN | NaN | NaN |
| 852846 | 2013-01-07 | 26.620001 | 26.740000 | 26.549999 | 26.660000 | 21.437866 | 7576200.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.688852 | NaN | NaN | NaN | NaN | NaN | 39.919745 | NaN | NaN | 26.680000 | 26.740000 | 26.670000 | NaN | NaN | NaN | NaN | NaN | 26.680000 | NaN | NaN | NaN | NaN | NaN | 21.502203 | 21.445908 | NaN | NaN | NaN | NaN | NaN | 21.453953 | NaN | NaN | NaN | NaN | NaN | 26.700001 | 25.840000 | NaN | NaN | NaN | NaN | NaN | 26.673334 | NaN | NaN | NaN | NaN | NaN | 26.830000 | 26.690001 | NaN | NaN | NaN | NaN | NaN | 26.780000 | NaN | NaN | NaN | NaN | NaN | 26.549999 | 25.780001 | NaN | NaN | NaN | NaN | NaN | 26.529999 | NaN | NaN | NaN | NaN | NaN | 7696000.0 | 17862400.0 | NaN | NaN | NaN | NaN | NaN | 8.115900e+06 | NaN | NaN | NaN | NaN | NaN |
| 852847 | 2013-01-08 | 26.520000 | 26.920000 | 26.459999 | 26.680000 | 21.453959 | 14360800.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.679078 | NaN | NaN | NaN | NaN | NaN | 55.233142 | NaN | NaN | 27.049999 | 26.660000 | 26.639999 | NaN | NaN | NaN | NaN | NaN | 26.693333 | 26.678 | NaN | NaN | NaN | NaN | 21.437866 | 21.421791 | NaN | NaN | NaN | NaN | NaN | 21.464676 | 21.452345 | NaN | NaN | NaN | NaN | 26.620001 | 26.700001 | NaN | NaN | NaN | NaN | NaN | 26.613334 | 26.476001 | NaN | NaN | NaN | NaN | 26.740000 | 26.770000 | NaN | NaN | NaN | NaN | NaN | 26.830000 | 26.79 | NaN | NaN | NaN | NaN | 26.549999 | 26.490000 | NaN | NaN | NaN | NaN | NaN | 26.519999 | 26.366 | NaN | NaN | NaN | NaN | 7576200.0 | 9075500.0 | NaN | NaN | NaN | NaN | NaN | 9.877667e+06 | 11314180.0 | NaN | NaN | NaN | NaN |
# Check if the DataFrame contains any missing values
if df.isna().any().any():
# Remove records with missing values and reset the index
df = df.dropna().reset_index(drop=True)
print("Missing records removed. DataFrame is now cleaned.")
else:
df = df.copy()
print("No missing records found. DataFrame remains unchanged.")
# Display the first few rows of the cleaned DataFrame
df.head()
Missing records removed. DataFrame is now cleaned.
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | ema_9 | sma_5 | sma_10 | sma_15 | sma_30 | rsi | mfi | macd | macd_signal | close_1d_next | close_1d_ago | close_3d_ago | close_5d_ago | close_7d_ago | close_14d_ago | close_21d_ago | close_28d_ago | close_3d_avg | close_5d_avg | close_7d_avg | close_10d_avg | close_15d_avg | close_30d_avg | adj close_1d_ago | adj close_3d_ago | adj close_5d_ago | adj close_7d_ago | adj close_14d_ago | adj close_21d_ago | adj close_28d_ago | adj close_3d_avg | adj close_5d_avg | adj close_7d_avg | adj close_10d_avg | adj close_15d_avg | adj close_30d_avg | open_1d_ago | open_3d_ago | open_5d_ago | open_7d_ago | open_14d_ago | open_21d_ago | open_28d_ago | open_3d_avg | open_5d_avg | open_7d_avg | open_10d_avg | open_15d_avg | open_30d_avg | high_1d_ago | high_3d_ago | high_5d_ago | high_7d_ago | high_14d_ago | high_21d_ago | high_28d_ago | high_3d_avg | high_5d_avg | high_7d_avg | high_10d_avg | high_15d_avg | high_30d_avg | low_1d_ago | low_3d_ago | low_5d_ago | low_7d_ago | low_14d_ago | low_21d_ago | low_28d_ago | low_3d_avg | low_5d_avg | low_7d_avg | low_10d_avg | low_15d_avg | low_30d_avg | volume_1d_ago | volume_3d_ago | volume_5d_ago | volume_7d_ago | volume_14d_ago | volume_21d_ago | volume_28d_ago | volume_3d_avg | volume_5d_avg | volume_7d_avg | volume_10d_avg | volume_15d_avg | volume_30d_avg | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2013-02-20 | 27.070000 | 27.150000 | 26.950001 | 27.030001 | 21.735399 | 17057200.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.307926 | 27.136 | 27.518 | 27.642000 | 27.589000 | 41.633625 | 53.176274 | -0.147786 | -0.050945 | 26.820000 | 26.959999 | 26.570000 | 27.680000 | 27.76 | 27.730000 | 28.080000 | 27.049999 | 26.903333 | 27.006 | 27.208571 | 27.426 | 27.588667 | 27.601333 | 21.679117 | 21.365499 | 22.258080 | 22.322405 | 22.298285 | 22.579723 | 21.751484 | 21.633545 | 21.716101 | 21.878994 | 22.053831 | 22.184635 | 22.194819 | 26.750000 | 26.690001 | 27.700001 | 27.799999 | 27.830000 | 27.969999 | 26.790001 | 26.886667 | 27.018 | 27.217143 | 27.386 | 27.553333 | 27.536667 | 27.190001 | 27.020000 | 27.830000 | 28.100000 | 27.980000 | 28.100000 | 27.080000 | 27.136667 | 27.248000 | 27.410000 | 27.618 | 27.779333 | 27.754667 | 26.750000 | 26.450001 | 27.270000 | 27.750000 | 27.67 | 27.820000 | 26.68 | 26.766667 | 26.842 | 27.015714 | 27.224 | 27.411333 | 27.382333 | 18297500.0 | 37728900.0 | 14931000.0 | 11159200.0 | 5800400.0 | 15906900.0 | 11671400.0 | 1.904973e+07 | 21756140.0 | 1.907480e+07 | 17005580.0 | 1.419575e+07 | 1.352419e+07 |
| 1 | 2013-02-21 | 26.990000 | 27.049999 | 26.639999 | 26.820000 | 21.566534 | 16936600.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.252312 | 27.006 | 27.426 | 27.588667 | 27.601333 | 38.257648 | 47.431888 | -0.175230 | -0.078792 | 26.770000 | 27.030001 | 26.719999 | 27.750000 | 27.75 | 27.790001 | 27.559999 | 27.309999 | 26.936666 | 26.820 | 27.075714 | 27.308 | 27.528000 | 27.606000 | 21.735399 | 21.486118 | 22.314371 | 22.314371 | 22.346525 | 22.161583 | 21.960548 | 21.660350 | 21.566534 | 21.772160 | 21.958945 | 22.135851 | 22.198572 | 27.070000 | 26.840000 | 27.740000 | 27.730000 | 27.650000 | 27.730000 | 27.129999 | 26.936666 | 26.868 | 27.111429 | 27.295 | 27.497333 | 27.552333 | 27.150000 | 27.070000 | 27.809999 | 27.799999 | 27.950001 | 28.040001 | 27.340000 | 27.130000 | 27.096000 | 27.302857 | 27.510 | 27.717333 | 27.759000 | 26.950001 | 26.600000 | 27.459999 | 27.629999 | 27.65 | 27.299999 | 27.09 | 26.780000 | 26.678 | 26.874286 | 27.108 | 27.342667 | 27.388333 | 17057200.0 | 21794500.0 | 13902600.0 | 9811900.0 | 7541300.0 | 18213200.0 | 16348500.0 | 1.743043e+07 | 22362940.0 | 2.009261e+07 | 17608410.0 | 1.493817e+07 | 1.361005e+07 |
| 2 | 2013-02-22 | 26.889999 | 27.129999 | 26.730000 | 26.770000 | 21.526327 | 16664800.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.165815 | 26.820 | 27.308 | 27.528000 | 27.606000 | 37.478423 | 48.958416 | -0.198438 | -0.104970 | 26.490000 | 26.820000 | 26.959999 | 26.570000 | 27.68 | 28.219999 | 27.790001 | 27.420000 | 26.873334 | 26.860 | 26.945714 | 27.181 | 27.460000 | 27.596667 | 21.566534 | 21.679117 | 21.365499 | 22.258080 | 22.692308 | 22.346525 | 22.049007 | 21.609420 | 21.598699 | 21.667624 | 21.856822 | 22.081171 | 22.191067 | 26.990000 | 26.750000 | 26.690001 | 27.700001 | 28.000000 | 27.500000 | 27.350000 | 26.983333 | 26.908 | 26.995714 | 27.220 | 27.446667 | 27.555667 | 27.049999 | 27.190001 | 27.020000 | 27.830000 | 28.320000 | 27.889999 | 27.540001 | 27.109999 | 27.118000 | 27.202857 | 27.415 | 27.662666 | 27.760667 | 26.639999 | 26.750000 | 26.450001 | 27.270000 | 27.93 | 27.350000 | 27.25 | 26.773333 | 26.734 | 26.797143 | 27.023 | 27.281333 | 27.390000 | 16936600.0 | 18297500.0 | 37728900.0 | 14931000.0 | 9623100.0 | 15212300.0 | 10162600.0 | 1.688620e+07 | 18150120.0 | 2.034030e+07 | 17828420.0 | 1.554640e+07 | 1.377650e+07 |
| 3 | 2013-02-25 | 26.790001 | 27.080000 | 26.480000 | 26.490000 | 21.301172 | 15527100.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.086626 | 26.860 | 27.181 | 27.460000 | 27.596667 | 33.378362 | 47.675126 | -0.235462 | -0.132994 | 26.950001 | 26.770000 | 27.030001 | 26.719999 | 27.75 | 27.879999 | 27.830000 | 27.480000 | 26.693333 | 26.814 | 26.765714 | 27.054 | 27.344667 | 27.569333 | 21.526327 | 21.735399 | 21.486118 | 22.314371 | 22.418896 | 22.378695 | 22.097254 | 21.464678 | 21.561710 | 21.522881 | 21.754699 | 21.988429 | 22.169087 | 26.889999 | 27.070000 | 26.840000 | 27.740000 | 28.010000 | 27.930000 | 27.459999 | 26.890000 | 26.898 | 26.860000 | 27.119 | 27.366000 | 27.544333 | 27.129999 | 27.150000 | 27.070000 | 27.809999 | 28.150000 | 28.030001 | 27.520000 | 27.086666 | 27.120000 | 27.098571 | 27.313 | 27.580000 | 27.752000 | 26.730000 | 26.950001 | 26.600000 | 27.459999 | 27.83 | 27.639999 | 27.17 | 26.616666 | 26.710 | 26.657143 | 26.896 | 27.184667 | 27.369667 | 16664800.0 | 17057200.0 | 21794500.0 | 13902600.0 | 8954300.0 | 14444500.0 | 8688200.0 | 1.637617e+07 | 16896640.0 | 2.057237e+07 | 18265210.0 | 1.594000e+07 | 1.374912e+07 |
| 4 | 2013-02-26 | 26.530001 | 26.980000 | 26.510000 | 26.950001 | 21.671074 | 13702900.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 26.967270 | 26.814 | 27.054 | 27.344667 | 27.569333 | 44.181951 | 48.178912 | -0.226841 | -0.152855 | 27.570000 | 26.490000 | 26.820000 | 26.959999 | 26.57 | 27.950001 | 27.780001 | 27.709999 | 26.736667 | 26.812 | 26.820000 | 26.974 | 27.282667 | 27.553667 | 21.301172 | 21.566534 | 21.679117 | 21.365499 | 22.475189 | 22.338484 | 22.282200 | 21.499524 | 21.560101 | 21.566535 | 21.690369 | 21.938574 | 22.156490 | 26.790001 | 26.990000 | 26.750000 | 26.690001 | 27.950001 | 27.830000 | 27.580000 | 26.736667 | 26.854 | 26.837143 | 26.999 | 27.267333 | 27.517000 | 27.080000 | 27.049999 | 27.190001 | 27.020000 | 28.110001 | 27.889999 | 27.740000 | 27.063333 | 27.077999 | 27.092857 | 27.231 | 27.502000 | 27.733333 | 26.480000 | 26.639999 | 26.750000 | 26.450001 | 27.85 | 27.690001 | 27.34 | 26.573333 | 26.662 | 26.665714 | 26.784 | 27.096667 | 27.345000 | 15527100.0 | 16936600.0 | 18297500.0 | 37728900.0 | 10961400.0 | 12066800.0 | 9863200.0 | 1.529827e+07 | 15977720.0 | 1.714009e+07 | 18654310.0 | 1.625657e+07 | 1.386713e+07 |
# # Calculate the index for the 70-30 split
# split_index = int(0.7 * len(df))
# # Split the DataFrame into training and testing sets
# train_df = df.iloc[:split_index]
# test_df = df.iloc[split_index:]
# Split the DataFrame into training and testing sets
train_df = df[df.date.dt.year<2020]
test_df = df[df.date.dt.year>=2020]
print(f"Train days: {len(train_df)}, Test days: {len(test_df)}")
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_df.date, y=train_df.close_1d_next, name='Training'))
fig.add_trace(go.Scatter(x=test_df.date, y=test_df.close_1d_next, name='Test'))
fig.show()
Train days: 1729, Test days: 970
drop_cols1 = ['date','open','high','low','close','adj close','volume','symbol','security',
'gics sector','gics sub-industry','headquarters location','date added','cik','founded']
train_df = train_df.drop(drop_cols1, 1)
test_df = test_df.drop(drop_cols1, 1)
# target column is next day's close price
y_train = train_df['close_1d_next'].copy()
X_train = train_df.drop(['close_1d_next'], 1)
# target column is next day's close price
y_test = test_df['close_1d_next'].copy()
X_test = test_df.drop(['close_1d_next'], 1)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train.shape, X_train_scaled.shape, X_test.shape, X_test_scaled.shape,
((1729, 87), (1729, 87), (970, 87), (970, 87))
def train_and_evaluate_models(X_train_scaled,y_train,X_test_scaled,y_test):
"""
Train and evaluate multiple regression models on a given dataframe.
Parameters:
- dataframe: Pandas DataFrame containing the dataset.
- target_column: Name of the target column (dependent variable).
- features_columns: List of column names used as features (independent variables).
Returns:
- A DataFrame containing evaluation metrics for each model.
"""
# Split the data into features (X) and target variable (y)
# X = dataframe[features_columns]
# y = dataframe[target_column]
# Split the data into training and testing sets (70-30 split)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)
# Initialize the models
models = {
'Linear Regression': LinearRegression(),
'Ridge Regression': Ridge(),
'Lasso Regression': Lasso(),
'Elastic Net': ElasticNet(),
'SVR': SVR(),
'K-Neighbors Regressor': KNeighborsRegressor(),
'Decision Tree': DecisionTreeRegressor(),
'Random Forest': RandomForestRegressor(),
'Gradient Boosting': GradientBoostingRegressor(),
'AdaBoost': AdaBoostRegressor(),
'XGBoost': XGBRegressor(),
'CatBoost': CatBoostRegressor()
}
# Initialize a DataFrame to store the evaluation metrics
metrics_df = pd.DataFrame(columns=['Model', 'Mean Squared Error', 'Mean Absolute Error', 'R2 Score'])
# Train and evaluate each model
for model_name, model in models.items():
start_time = time.time()
# Train the model
model.fit(X_train_scaled, y_train)
end_time = time.time() # Record the end time
training_time = end_time - start_time
# Make predictions
y_pred = model.predict(X_test_scaled)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Store the metrics in the DataFrame
metrics_df = metrics_df.append({
'Model': model_name,
'Mean Squared Error': mse,
'Mean Absolute Error': mae,
'R2 Score': r2,
'Training Time (s)': training_time
}, ignore_index=True)
metrics_df = metrics_df.sort_values(by=['R2 Score'],ascending=False)
return metrics_df
df_compare = train_and_evaluate_models(X_train,y_train,X_test,y_test)
Learning rate set to 0.044643 0: learn: 6.0603979 total: 63.2ms remaining: 1m 3s 1: learn: 5.8169612 total: 69.1ms remaining: 34.5s 2: learn: 5.5755845 total: 74ms remaining: 24.6s 3: learn: 5.3462380 total: 78.7ms remaining: 19.6s 4: learn: 5.1261132 total: 83.9ms remaining: 16.7s 5: learn: 4.9208602 total: 88.5ms remaining: 14.7s 6: learn: 4.7250714 total: 93.4ms remaining: 13.2s 7: learn: 4.5456470 total: 97.7ms remaining: 12.1s 8: learn: 4.3606091 total: 102ms remaining: 11.3s 9: learn: 4.1830096 total: 107ms remaining: 10.6s 10: learn: 4.0166823 total: 111ms remaining: 10s 11: learn: 3.8657076 total: 117ms remaining: 9.62s 12: learn: 3.7140111 total: 130ms remaining: 9.84s 13: learn: 3.5654097 total: 140ms remaining: 9.83s 14: learn: 3.4264460 total: 145ms remaining: 9.54s 15: learn: 3.2905647 total: 150ms remaining: 9.22s 16: learn: 3.1619168 total: 155ms remaining: 8.95s 17: learn: 3.0386276 total: 161ms remaining: 8.76s 18: learn: 2.9250019 total: 167ms remaining: 8.61s 19: learn: 2.8155633 total: 172ms remaining: 8.42s 20: learn: 2.7067078 total: 176ms remaining: 8.2s 21: learn: 2.6104041 total: 180ms remaining: 8s 22: learn: 2.5171443 total: 186ms remaining: 7.91s 23: learn: 2.4234694 total: 191ms remaining: 7.75s 24: learn: 2.3336920 total: 195ms remaining: 7.62s 25: learn: 2.2501270 total: 201ms remaining: 7.52s 26: learn: 2.1685983 total: 206ms remaining: 7.41s 27: learn: 2.0914990 total: 212ms remaining: 7.35s 28: learn: 2.0188179 total: 217ms remaining: 7.25s 29: learn: 1.9490958 total: 221ms remaining: 7.14s 30: learn: 1.8819748 total: 225ms remaining: 7.04s 31: learn: 1.8183164 total: 230ms remaining: 6.95s 32: learn: 1.7609789 total: 234ms remaining: 6.86s 33: learn: 1.7008286 total: 238ms remaining: 6.77s 34: learn: 1.6438044 total: 243ms remaining: 6.69s 35: learn: 1.5954289 total: 247ms remaining: 6.61s 36: learn: 1.5458456 total: 251ms remaining: 6.53s 37: learn: 1.4999957 total: 255ms remaining: 6.46s 38: learn: 1.4557749 total: 259ms remaining: 6.39s 39: learn: 1.4136122 total: 264ms remaining: 6.33s 40: learn: 1.3740245 total: 268ms remaining: 6.28s 41: learn: 1.3360847 total: 273ms remaining: 6.21s 42: learn: 1.3006229 total: 277ms remaining: 6.16s 43: learn: 1.2683312 total: 281ms remaining: 6.11s 44: learn: 1.2328651 total: 285ms remaining: 6.05s 45: learn: 1.2022101 total: 289ms remaining: 6s 46: learn: 1.1716981 total: 294ms remaining: 5.95s 47: learn: 1.1425806 total: 298ms remaining: 5.91s 48: learn: 1.1126613 total: 302ms remaining: 5.86s 49: learn: 1.0845503 total: 306ms remaining: 5.82s 50: learn: 1.0590177 total: 311ms remaining: 5.78s 51: learn: 1.0347707 total: 315ms remaining: 5.74s 52: learn: 1.0113231 total: 319ms remaining: 5.7s 53: learn: 0.9886945 total: 323ms remaining: 5.66s 54: learn: 0.9669580 total: 328ms remaining: 5.63s 55: learn: 0.9468328 total: 332ms remaining: 5.59s 56: learn: 0.9270737 total: 336ms remaining: 5.56s 57: learn: 0.9100334 total: 340ms remaining: 5.52s 58: learn: 0.8917612 total: 345ms remaining: 5.5s 59: learn: 0.8750815 total: 349ms remaining: 5.47s 60: learn: 0.8601060 total: 353ms remaining: 5.44s 61: learn: 0.8462298 total: 358ms remaining: 5.41s 62: learn: 0.8345633 total: 362ms remaining: 5.38s 63: learn: 0.8216135 total: 367ms remaining: 5.36s 64: learn: 0.8083116 total: 371ms remaining: 5.34s 65: learn: 0.7956239 total: 375ms remaining: 5.31s 66: learn: 0.7843834 total: 380ms remaining: 5.29s 67: learn: 0.7740919 total: 384ms remaining: 5.26s 68: learn: 0.7643108 total: 388ms remaining: 5.24s 69: learn: 0.7561664 total: 393ms remaining: 5.22s 70: learn: 0.7491327 total: 397ms remaining: 5.2s 71: learn: 0.7411352 total: 403ms remaining: 5.2s 72: learn: 0.7330505 total: 408ms remaining: 5.18s 73: learn: 0.7269962 total: 413ms remaining: 5.17s 74: learn: 0.7192950 total: 418ms remaining: 5.15s 75: learn: 0.7126213 total: 422ms remaining: 5.13s 76: learn: 0.7067352 total: 426ms remaining: 5.11s 77: learn: 0.7006922 total: 430ms remaining: 5.08s 78: learn: 0.6944183 total: 435ms remaining: 5.07s 79: learn: 0.6885217 total: 439ms remaining: 5.05s 80: learn: 0.6825927 total: 443ms remaining: 5.03s 81: learn: 0.6783627 total: 448ms remaining: 5.01s 82: learn: 0.6741644 total: 452ms remaining: 5s 83: learn: 0.6696567 total: 456ms remaining: 4.98s 84: learn: 0.6646154 total: 461ms remaining: 4.96s 85: learn: 0.6599463 total: 465ms remaining: 4.95s 86: learn: 0.6560602 total: 470ms remaining: 4.93s 87: learn: 0.6536929 total: 474ms remaining: 4.91s 88: learn: 0.6496752 total: 478ms remaining: 4.89s 89: learn: 0.6462499 total: 483ms remaining: 4.89s 90: learn: 0.6428735 total: 488ms remaining: 4.88s 91: learn: 0.6397495 total: 494ms remaining: 4.87s 92: learn: 0.6367543 total: 499ms remaining: 4.86s 93: learn: 0.6337726 total: 503ms remaining: 4.84s 94: learn: 0.6308954 total: 507ms remaining: 4.83s 95: learn: 0.6280612 total: 511ms remaining: 4.81s 96: learn: 0.6250764 total: 515ms remaining: 4.79s 97: learn: 0.6223572 total: 519ms remaining: 4.78s 98: learn: 0.6205398 total: 524ms remaining: 4.77s 99: learn: 0.6186628 total: 528ms remaining: 4.75s 100: learn: 0.6154184 total: 532ms remaining: 4.74s 101: learn: 0.6124974 total: 536ms remaining: 4.72s 102: learn: 0.6095369 total: 541ms remaining: 4.71s 103: learn: 0.6081161 total: 545ms remaining: 4.69s 104: learn: 0.6061648 total: 549ms remaining: 4.68s 105: learn: 0.6037361 total: 554ms remaining: 4.67s 106: learn: 0.6018697 total: 558ms remaining: 4.66s 107: learn: 0.5996563 total: 562ms remaining: 4.64s 108: learn: 0.5987782 total: 567ms remaining: 4.63s 109: learn: 0.5962953 total: 571ms remaining: 4.62s 110: learn: 0.5944525 total: 575ms remaining: 4.61s 111: learn: 0.5925792 total: 580ms remaining: 4.59s 112: learn: 0.5908725 total: 584ms remaining: 4.59s 113: learn: 0.5892615 total: 590ms remaining: 4.58s 114: learn: 0.5876082 total: 595ms remaining: 4.58s 115: learn: 0.5856574 total: 600ms remaining: 4.58s 116: learn: 0.5839197 total: 606ms remaining: 4.57s 117: learn: 0.5821181 total: 611ms remaining: 4.57s 118: learn: 0.5802687 total: 617ms remaining: 4.57s 119: learn: 0.5783092 total: 621ms remaining: 4.55s 120: learn: 0.5768204 total: 625ms remaining: 4.54s 121: learn: 0.5753784 total: 629ms remaining: 4.53s 122: learn: 0.5739414 total: 634ms remaining: 4.52s 123: learn: 0.5731172 total: 638ms remaining: 4.51s 124: learn: 0.5717661 total: 642ms remaining: 4.5s 125: learn: 0.5699490 total: 647ms remaining: 4.49s 126: learn: 0.5692480 total: 651ms remaining: 4.47s 127: learn: 0.5683714 total: 655ms remaining: 4.46s 128: learn: 0.5669671 total: 659ms remaining: 4.45s 129: learn: 0.5661305 total: 664ms remaining: 4.44s 130: learn: 0.5648214 total: 668ms remaining: 4.43s 131: learn: 0.5632118 total: 672ms remaining: 4.42s 132: learn: 0.5621830 total: 676ms remaining: 4.41s 133: learn: 0.5614073 total: 680ms remaining: 4.4s 134: learn: 0.5607392 total: 685ms remaining: 4.39s 135: learn: 0.5592399 total: 689ms remaining: 4.38s 136: learn: 0.5582825 total: 694ms remaining: 4.37s 137: learn: 0.5570203 total: 698ms remaining: 4.36s 138: learn: 0.5555467 total: 702ms remaining: 4.35s 139: learn: 0.5544866 total: 707ms remaining: 4.34s 140: learn: 0.5534618 total: 712ms remaining: 4.34s 141: learn: 0.5527745 total: 716ms remaining: 4.33s 142: learn: 0.5517276 total: 720ms remaining: 4.32s 143: learn: 0.5511754 total: 725ms remaining: 4.31s 144: learn: 0.5499848 total: 729ms remaining: 4.3s 145: learn: 0.5492370 total: 733ms remaining: 4.29s 146: learn: 0.5480831 total: 737ms remaining: 4.28s 147: learn: 0.5470207 total: 741ms remaining: 4.26s 148: learn: 0.5461768 total: 745ms remaining: 4.26s 149: learn: 0.5452187 total: 750ms remaining: 4.25s 150: learn: 0.5442155 total: 754ms remaining: 4.24s 151: learn: 0.5431025 total: 758ms remaining: 4.23s 152: learn: 0.5423642 total: 762ms remaining: 4.22s 153: learn: 0.5414228 total: 767ms remaining: 4.21s 154: learn: 0.5402482 total: 771ms remaining: 4.2s 155: learn: 0.5390607 total: 775ms remaining: 4.19s 156: learn: 0.5383473 total: 779ms remaining: 4.18s 157: learn: 0.5373131 total: 784ms remaining: 4.18s 158: learn: 0.5364392 total: 789ms remaining: 4.17s 159: learn: 0.5357421 total: 793ms remaining: 4.16s 160: learn: 0.5343277 total: 798ms remaining: 4.16s 161: learn: 0.5337785 total: 803ms remaining: 4.16s 162: learn: 0.5329012 total: 809ms remaining: 4.15s 163: learn: 0.5317662 total: 814ms remaining: 4.15s 164: learn: 0.5305788 total: 819ms remaining: 4.14s 165: learn: 0.5291670 total: 823ms remaining: 4.13s 166: learn: 0.5279711 total: 827ms remaining: 4.13s 167: learn: 0.5263201 total: 831ms remaining: 4.12s 168: learn: 0.5250413 total: 836ms remaining: 4.11s 169: learn: 0.5239617 total: 840ms remaining: 4.1s 170: learn: 0.5232086 total: 844ms remaining: 4.09s 171: learn: 0.5223393 total: 849ms remaining: 4.09s 172: learn: 0.5213785 total: 853ms remaining: 4.08s 173: learn: 0.5205693 total: 858ms remaining: 4.07s 174: learn: 0.5195080 total: 862ms remaining: 4.06s 175: learn: 0.5184230 total: 866ms remaining: 4.05s 176: learn: 0.5179592 total: 870ms remaining: 4.05s 177: learn: 0.5170185 total: 874ms remaining: 4.04s 178: learn: 0.5158612 total: 879ms remaining: 4.03s 179: learn: 0.5145391 total: 883ms remaining: 4.02s 180: learn: 0.5134332 total: 887ms remaining: 4.01s 181: learn: 0.5123862 total: 891ms remaining: 4.01s 182: learn: 0.5109381 total: 896ms remaining: 4s 183: learn: 0.5094690 total: 900ms remaining: 3.99s 184: learn: 0.5087636 total: 904ms remaining: 3.98s 185: learn: 0.5082193 total: 908ms remaining: 3.97s 186: learn: 0.5077741 total: 912ms remaining: 3.97s 187: learn: 0.5064784 total: 917ms remaining: 3.96s 188: learn: 0.5053126 total: 921ms remaining: 3.95s 189: learn: 0.5045861 total: 925ms remaining: 3.94s 190: learn: 0.5035784 total: 929ms remaining: 3.94s 191: learn: 0.5025377 total: 934ms remaining: 3.93s 192: learn: 0.5017748 total: 938ms remaining: 3.92s 193: learn: 0.5008812 total: 942ms remaining: 3.91s 194: learn: 0.5004256 total: 946ms remaining: 3.91s 195: learn: 0.4994655 total: 951ms remaining: 3.9s 196: learn: 0.4989930 total: 955ms remaining: 3.89s 197: learn: 0.4980951 total: 959ms remaining: 3.88s 198: learn: 0.4973099 total: 963ms remaining: 3.88s 199: learn: 0.4961640 total: 968ms remaining: 3.87s 200: learn: 0.4946290 total: 972ms remaining: 3.86s 201: learn: 0.4940315 total: 976ms remaining: 3.85s 202: learn: 0.4931395 total: 981ms remaining: 3.85s 203: learn: 0.4920884 total: 985ms remaining: 3.85s 204: learn: 0.4909474 total: 990ms remaining: 3.84s 205: learn: 0.4899796 total: 996ms remaining: 3.84s 206: learn: 0.4890645 total: 1s remaining: 3.83s 207: learn: 0.4882374 total: 1s remaining: 3.82s 208: learn: 0.4871172 total: 1.01s remaining: 3.82s 209: learn: 0.4861811 total: 1.01s remaining: 3.81s 210: learn: 0.4850126 total: 1.02s remaining: 3.8s 211: learn: 0.4837164 total: 1.02s remaining: 3.8s 212: learn: 0.4829656 total: 1.02s remaining: 3.79s 213: learn: 0.4819662 total: 1.03s remaining: 3.78s 214: learn: 0.4817394 total: 1.03s remaining: 3.77s 215: learn: 0.4811177 total: 1.04s remaining: 3.77s 216: learn: 0.4806224 total: 1.04s remaining: 3.76s 217: learn: 0.4800808 total: 1.05s remaining: 3.75s 218: learn: 0.4789126 total: 1.05s remaining: 3.75s 219: learn: 0.4784037 total: 1.05s remaining: 3.74s 220: learn: 0.4774369 total: 1.06s remaining: 3.74s 221: learn: 0.4764525 total: 1.06s remaining: 3.73s 222: learn: 0.4757860 total: 1.07s remaining: 3.72s 223: learn: 0.4748362 total: 1.07s remaining: 3.72s 224: learn: 0.4738011 total: 1.08s remaining: 3.71s 225: learn: 0.4732533 total: 1.08s remaining: 3.71s 226: learn: 0.4724302 total: 1.09s remaining: 3.7s 227: learn: 0.4714185 total: 1.09s remaining: 3.7s 228: learn: 0.4704901 total: 1.1s remaining: 3.69s 229: learn: 0.4696393 total: 1.1s remaining: 3.68s 230: learn: 0.4688642 total: 1.1s remaining: 3.68s 231: learn: 0.4684056 total: 1.11s remaining: 3.67s 232: learn: 0.4680263 total: 1.11s remaining: 3.66s 233: learn: 0.4673195 total: 1.12s remaining: 3.66s 234: learn: 0.4661429 total: 1.12s remaining: 3.65s 235: learn: 0.4651517 total: 1.13s remaining: 3.65s 236: learn: 0.4643784 total: 1.13s remaining: 3.65s 237: learn: 0.4639422 total: 1.14s remaining: 3.64s 238: learn: 0.4629527 total: 1.14s remaining: 3.63s 239: learn: 0.4621494 total: 1.15s remaining: 3.63s 240: learn: 0.4611603 total: 1.15s remaining: 3.62s 241: learn: 0.4603838 total: 1.15s remaining: 3.62s 242: learn: 0.4593452 total: 1.16s remaining: 3.61s 243: learn: 0.4586434 total: 1.16s remaining: 3.6s 244: learn: 0.4577788 total: 1.17s remaining: 3.6s 245: learn: 0.4576161 total: 1.17s remaining: 3.59s 246: learn: 0.4570526 total: 1.18s remaining: 3.59s 247: learn: 0.4560632 total: 1.18s remaining: 3.59s 248: learn: 0.4552049 total: 1.19s remaining: 3.59s 249: learn: 0.4544847 total: 1.2s remaining: 3.59s 250: learn: 0.4536628 total: 1.2s remaining: 3.58s 251: learn: 0.4529050 total: 1.21s remaining: 3.58s 252: learn: 0.4521824 total: 1.21s remaining: 3.57s 253: learn: 0.4515072 total: 1.21s remaining: 3.57s 254: learn: 0.4506698 total: 1.22s remaining: 3.56s 255: learn: 0.4501205 total: 1.22s remaining: 3.56s 256: learn: 0.4493848 total: 1.23s remaining: 3.55s 257: learn: 0.4486159 total: 1.23s remaining: 3.55s 258: learn: 0.4482442 total: 1.24s remaining: 3.54s 259: learn: 0.4478827 total: 1.24s remaining: 3.54s 260: learn: 0.4473978 total: 1.25s remaining: 3.53s 261: learn: 0.4468994 total: 1.25s remaining: 3.53s 262: learn: 0.4456701 total: 1.26s remaining: 3.52s 263: learn: 0.4449110 total: 1.26s remaining: 3.52s 264: learn: 0.4442542 total: 1.27s remaining: 3.51s 265: learn: 0.4431941 total: 1.27s remaining: 3.51s 266: learn: 0.4425103 total: 1.27s remaining: 3.5s 267: learn: 0.4416661 total: 1.28s remaining: 3.49s 268: learn: 0.4411630 total: 1.28s remaining: 3.49s 269: learn: 0.4404441 total: 1.29s remaining: 3.48s 270: learn: 0.4400363 total: 1.29s remaining: 3.48s 271: learn: 0.4398475 total: 1.3s remaining: 3.47s 272: learn: 0.4390363 total: 1.3s remaining: 3.47s 273: learn: 0.4381161 total: 1.31s remaining: 3.46s 274: learn: 0.4374399 total: 1.31s remaining: 3.46s 275: learn: 0.4370343 total: 1.31s remaining: 3.45s 276: learn: 0.4366440 total: 1.32s remaining: 3.45s 277: learn: 0.4363497 total: 1.32s remaining: 3.44s 278: learn: 0.4360749 total: 1.33s remaining: 3.43s 279: learn: 0.4353030 total: 1.33s remaining: 3.43s 280: learn: 0.4346082 total: 1.34s remaining: 3.42s 281: learn: 0.4342305 total: 1.34s remaining: 3.42s 282: learn: 0.4338120 total: 1.34s remaining: 3.41s 283: learn: 0.4336984 total: 1.35s remaining: 3.4s 284: learn: 0.4329844 total: 1.35s remaining: 3.4s 285: learn: 0.4325585 total: 1.36s remaining: 3.39s 286: learn: 0.4316236 total: 1.36s remaining: 3.38s 287: learn: 0.4312618 total: 1.37s remaining: 3.38s 288: learn: 0.4305788 total: 1.37s remaining: 3.37s 289: learn: 0.4299323 total: 1.38s remaining: 3.37s 290: learn: 0.4294147 total: 1.38s remaining: 3.37s 291: learn: 0.4287311 total: 1.39s remaining: 3.36s 292: learn: 0.4281021 total: 1.39s remaining: 3.35s 293: learn: 0.4274824 total: 1.39s remaining: 3.35s 294: learn: 0.4266494 total: 1.4s remaining: 3.34s 295: learn: 0.4259133 total: 1.4s remaining: 3.34s 296: learn: 0.4254208 total: 1.41s remaining: 3.33s 297: learn: 0.4248581 total: 1.41s remaining: 3.33s 298: learn: 0.4244197 total: 1.42s remaining: 3.33s 299: learn: 0.4238834 total: 1.42s remaining: 3.32s 300: learn: 0.4235753 total: 1.43s remaining: 3.31s 301: learn: 0.4230278 total: 1.43s remaining: 3.31s 302: learn: 0.4223168 total: 1.44s remaining: 3.3s 303: learn: 0.4218202 total: 1.44s remaining: 3.3s 304: learn: 0.4213324 total: 1.44s remaining: 3.29s 305: learn: 0.4209713 total: 1.45s remaining: 3.29s 306: learn: 0.4206682 total: 1.45s remaining: 3.28s 307: learn: 0.4205730 total: 1.46s remaining: 3.27s 308: learn: 0.4200366 total: 1.46s remaining: 3.27s 309: learn: 0.4196622 total: 1.47s remaining: 3.26s 310: learn: 0.4188254 total: 1.47s remaining: 3.25s 311: learn: 0.4182222 total: 1.47s remaining: 3.25s 312: learn: 0.4181272 total: 1.48s remaining: 3.24s 313: learn: 0.4178863 total: 1.48s remaining: 3.24s 314: learn: 0.4171997 total: 1.49s remaining: 3.23s 315: learn: 0.4165719 total: 1.49s remaining: 3.23s 316: learn: 0.4158275 total: 1.49s remaining: 3.22s 317: learn: 0.4150223 total: 1.5s remaining: 3.21s 318: learn: 0.4144227 total: 1.5s remaining: 3.21s 319: learn: 0.4138124 total: 1.51s remaining: 3.2s 320: learn: 0.4135332 total: 1.51s remaining: 3.2s 321: learn: 0.4129018 total: 1.51s remaining: 3.19s 322: learn: 0.4123695 total: 1.52s remaining: 3.18s 323: learn: 0.4116624 total: 1.52s remaining: 3.18s 324: learn: 0.4108634 total: 1.53s remaining: 3.17s 325: learn: 0.4103299 total: 1.53s remaining: 3.17s 326: learn: 0.4096449 total: 1.54s remaining: 3.16s 327: learn: 0.4092467 total: 1.54s remaining: 3.16s 328: learn: 0.4090363 total: 1.54s remaining: 3.15s 329: learn: 0.4080857 total: 1.55s remaining: 3.14s 330: learn: 0.4077160 total: 1.55s remaining: 3.14s 331: learn: 0.4071226 total: 1.56s remaining: 3.13s 332: learn: 0.4067749 total: 1.56s remaining: 3.13s 333: learn: 0.4062413 total: 1.57s remaining: 3.12s 334: learn: 0.4055695 total: 1.57s remaining: 3.12s 335: learn: 0.4054942 total: 1.57s remaining: 3.11s 336: learn: 0.4047835 total: 1.58s remaining: 3.11s 337: learn: 0.4042196 total: 1.58s remaining: 3.1s 338: learn: 0.4038735 total: 1.59s remaining: 3.1s 339: learn: 0.4036386 total: 1.59s remaining: 3.1s 340: learn: 0.4033409 total: 1.6s remaining: 3.09s 341: learn: 0.4026643 total: 1.6s remaining: 3.08s 342: learn: 0.4023055 total: 1.61s remaining: 3.08s 343: learn: 0.4021658 total: 1.61s remaining: 3.07s 344: learn: 0.4018412 total: 1.62s remaining: 3.07s 345: learn: 0.4013429 total: 1.62s remaining: 3.06s 346: learn: 0.4008246 total: 1.63s remaining: 3.06s 347: learn: 0.4002585 total: 1.63s remaining: 3.05s 348: learn: 0.3998110 total: 1.63s remaining: 3.05s 349: learn: 0.3990401 total: 1.64s remaining: 3.04s 350: learn: 0.3982868 total: 1.64s remaining: 3.04s 351: learn: 0.3977941 total: 1.65s remaining: 3.03s 352: learn: 0.3971540 total: 1.65s remaining: 3.03s 353: learn: 0.3966626 total: 1.66s remaining: 3.02s 354: learn: 0.3965989 total: 1.66s remaining: 3.01s 355: learn: 0.3962189 total: 1.66s remaining: 3.01s 356: learn: 0.3958068 total: 1.67s remaining: 3s 357: learn: 0.3953095 total: 1.67s remaining: 3s 358: learn: 0.3948418 total: 1.68s remaining: 2.99s 359: learn: 0.3946123 total: 1.68s remaining: 2.99s 360: learn: 0.3938100 total: 1.69s remaining: 2.98s 361: learn: 0.3932202 total: 1.69s remaining: 2.98s 362: learn: 0.3925485 total: 1.69s remaining: 2.97s 363: learn: 0.3918346 total: 1.7s remaining: 2.97s 364: learn: 0.3915180 total: 1.7s remaining: 2.96s 365: learn: 0.3907133 total: 1.71s remaining: 2.96s 366: learn: 0.3905787 total: 1.71s remaining: 2.95s 367: learn: 0.3900100 total: 1.72s remaining: 2.95s 368: learn: 0.3894581 total: 1.72s remaining: 2.94s 369: learn: 0.3887745 total: 1.73s remaining: 2.94s 370: learn: 0.3881815 total: 1.73s remaining: 2.93s 371: learn: 0.3877839 total: 1.73s remaining: 2.93s 372: learn: 0.3872649 total: 1.74s remaining: 2.92s 373: learn: 0.3864811 total: 1.74s remaining: 2.92s 374: learn: 0.3859586 total: 1.75s remaining: 2.91s 375: learn: 0.3857552 total: 1.75s remaining: 2.9s 376: learn: 0.3851249 total: 1.75s remaining: 2.9s 377: learn: 0.3846241 total: 1.76s remaining: 2.9s 378: learn: 0.3842221 total: 1.76s remaining: 2.89s 379: learn: 0.3840501 total: 1.77s remaining: 2.88s 380: learn: 0.3839565 total: 1.77s remaining: 2.88s 381: learn: 0.3834811 total: 1.78s remaining: 2.88s 382: learn: 0.3829302 total: 1.78s remaining: 2.87s 383: learn: 0.3824233 total: 1.79s remaining: 2.87s 384: learn: 0.3820862 total: 1.79s remaining: 2.86s 385: learn: 0.3817059 total: 1.79s remaining: 2.86s 386: learn: 0.3810940 total: 1.8s remaining: 2.85s 387: learn: 0.3807112 total: 1.8s remaining: 2.85s 388: learn: 0.3802605 total: 1.81s remaining: 2.84s 389: learn: 0.3798039 total: 1.81s remaining: 2.83s 390: learn: 0.3793238 total: 1.82s remaining: 2.83s 391: learn: 0.3787842 total: 1.82s remaining: 2.83s 392: learn: 0.3783469 total: 1.82s remaining: 2.82s 393: learn: 0.3774757 total: 1.83s remaining: 2.81s 394: learn: 0.3770911 total: 1.83s remaining: 2.81s 395: learn: 0.3767255 total: 1.84s remaining: 2.8s 396: learn: 0.3761264 total: 1.84s remaining: 2.8s 397: learn: 0.3757592 total: 1.85s remaining: 2.79s 398: learn: 0.3751453 total: 1.85s remaining: 2.79s 399: learn: 0.3748033 total: 1.85s remaining: 2.78s 400: learn: 0.3743184 total: 1.86s remaining: 2.78s 401: learn: 0.3740077 total: 1.86s remaining: 2.77s 402: learn: 0.3733825 total: 1.87s remaining: 2.77s 403: learn: 0.3728581 total: 1.87s remaining: 2.76s 404: learn: 0.3722381 total: 1.88s remaining: 2.76s 405: learn: 0.3716600 total: 1.88s remaining: 2.75s 406: learn: 0.3709735 total: 1.89s remaining: 2.75s 407: learn: 0.3706292 total: 1.89s remaining: 2.74s 408: learn: 0.3700405 total: 1.89s remaining: 2.74s 409: learn: 0.3695724 total: 1.9s remaining: 2.73s 410: learn: 0.3688604 total: 1.9s remaining: 2.73s 411: learn: 0.3687772 total: 1.91s remaining: 2.72s 412: learn: 0.3680599 total: 1.91s remaining: 2.72s 413: learn: 0.3676867 total: 1.92s remaining: 2.71s 414: learn: 0.3673532 total: 1.92s remaining: 2.71s 415: learn: 0.3667493 total: 1.92s remaining: 2.7s 416: learn: 0.3663581 total: 1.93s remaining: 2.69s 417: learn: 0.3659273 total: 1.93s remaining: 2.69s 418: learn: 0.3653585 total: 1.94s remaining: 2.69s 419: learn: 0.3647104 total: 1.94s remaining: 2.68s 420: learn: 0.3643467 total: 1.94s remaining: 2.67s 421: learn: 0.3638706 total: 1.95s remaining: 2.67s 422: learn: 0.3637044 total: 1.95s remaining: 2.66s 423: learn: 0.3630532 total: 1.96s remaining: 2.66s 424: learn: 0.3628976 total: 1.96s remaining: 2.65s 425: learn: 0.3622257 total: 1.97s remaining: 2.65s 426: learn: 0.3618914 total: 1.97s remaining: 2.65s 427: learn: 0.3614434 total: 1.98s remaining: 2.64s 428: learn: 0.3610056 total: 1.98s remaining: 2.64s 429: learn: 0.3603069 total: 1.99s remaining: 2.63s 430: learn: 0.3599433 total: 1.99s remaining: 2.63s 431: learn: 0.3598531 total: 1.99s remaining: 2.62s 432: learn: 0.3597998 total: 2s remaining: 2.62s 433: learn: 0.3592574 total: 2s remaining: 2.61s 434: learn: 0.3586937 total: 2.01s remaining: 2.6s 435: learn: 0.3582985 total: 2.01s remaining: 2.6s 436: learn: 0.3579804 total: 2.01s remaining: 2.6s 437: learn: 0.3576664 total: 2.02s remaining: 2.59s 438: learn: 0.3572953 total: 2.02s remaining: 2.58s 439: learn: 0.3568969 total: 2.03s remaining: 2.58s 440: learn: 0.3562306 total: 2.03s remaining: 2.58s 441: learn: 0.3555624 total: 2.04s remaining: 2.57s 442: learn: 0.3550610 total: 2.04s remaining: 2.56s 443: learn: 0.3545855 total: 2.04s remaining: 2.56s 444: learn: 0.3541245 total: 2.04s remaining: 2.55s 445: learn: 0.3537684 total: 2.05s remaining: 2.54s 446: learn: 0.3534726 total: 2.05s remaining: 2.54s 447: learn: 0.3533520 total: 2.06s remaining: 2.53s 448: learn: 0.3530596 total: 2.06s remaining: 2.53s 449: learn: 0.3525138 total: 2.06s remaining: 2.52s 450: learn: 0.3520520 total: 2.07s remaining: 2.52s 451: learn: 0.3517294 total: 2.07s remaining: 2.51s 452: learn: 0.3516567 total: 2.08s remaining: 2.5s 453: learn: 0.3513043 total: 2.08s remaining: 2.5s 454: learn: 0.3509406 total: 2.08s remaining: 2.49s 455: learn: 0.3504919 total: 2.08s remaining: 2.49s 456: learn: 0.3498914 total: 2.09s remaining: 2.48s 457: learn: 0.3494417 total: 2.09s remaining: 2.48s 458: learn: 0.3490944 total: 2.1s remaining: 2.47s 459: learn: 0.3487102 total: 2.1s remaining: 2.46s 460: learn: 0.3484557 total: 2.1s remaining: 2.46s 461: learn: 0.3478789 total: 2.11s remaining: 2.46s 462: learn: 0.3474275 total: 2.11s remaining: 2.45s 463: learn: 0.3468342 total: 2.12s remaining: 2.45s 464: learn: 0.3463040 total: 2.12s remaining: 2.44s 465: learn: 0.3458213 total: 2.13s remaining: 2.44s 466: learn: 0.3451803 total: 2.13s remaining: 2.43s 467: learn: 0.3447292 total: 2.14s remaining: 2.43s 468: learn: 0.3443530 total: 2.14s remaining: 2.42s 469: learn: 0.3437850 total: 2.15s remaining: 2.42s 470: learn: 0.3433565 total: 2.15s remaining: 2.41s 471: learn: 0.3430048 total: 2.15s remaining: 2.41s 472: learn: 0.3425770 total: 2.16s remaining: 2.4s 473: learn: 0.3419536 total: 2.16s remaining: 2.4s 474: learn: 0.3414171 total: 2.17s remaining: 2.4s 475: learn: 0.3409048 total: 2.17s remaining: 2.39s 476: learn: 0.3406389 total: 2.18s remaining: 2.39s 477: learn: 0.3402231 total: 2.18s remaining: 2.38s 478: learn: 0.3397786 total: 2.19s remaining: 2.38s 479: learn: 0.3396398 total: 2.19s remaining: 2.37s 480: learn: 0.3395825 total: 2.19s remaining: 2.37s 481: learn: 0.3391371 total: 2.2s remaining: 2.36s 482: learn: 0.3386234 total: 2.2s remaining: 2.36s 483: learn: 0.3382257 total: 2.21s remaining: 2.35s 484: learn: 0.3378495 total: 2.21s remaining: 2.35s 485: learn: 0.3375195 total: 2.22s remaining: 2.34s 486: learn: 0.3371611 total: 2.22s remaining: 2.34s 487: learn: 0.3367422 total: 2.22s remaining: 2.33s 488: learn: 0.3363054 total: 2.23s remaining: 2.33s 489: learn: 0.3359786 total: 2.23s remaining: 2.32s 490: learn: 0.3356998 total: 2.24s remaining: 2.32s 491: learn: 0.3354453 total: 2.24s remaining: 2.31s 492: learn: 0.3352187 total: 2.25s remaining: 2.31s 493: learn: 0.3350124 total: 2.25s remaining: 2.31s 494: learn: 0.3346929 total: 2.25s remaining: 2.3s 495: learn: 0.3342032 total: 2.26s remaining: 2.3s 496: learn: 0.3336972 total: 2.26s remaining: 2.29s 497: learn: 0.3336357 total: 2.27s remaining: 2.29s 498: learn: 0.3333691 total: 2.27s remaining: 2.28s 499: learn: 0.3329063 total: 2.28s remaining: 2.28s 500: learn: 0.3326459 total: 2.28s remaining: 2.27s 501: learn: 0.3323511 total: 2.28s remaining: 2.27s 502: learn: 0.3319477 total: 2.29s remaining: 2.26s 503: learn: 0.3317364 total: 2.29s remaining: 2.26s 504: learn: 0.3312039 total: 2.3s remaining: 2.25s 505: learn: 0.3305307 total: 2.3s remaining: 2.25s 506: learn: 0.3300698 total: 2.31s remaining: 2.24s 507: learn: 0.3296915 total: 2.31s remaining: 2.24s 508: learn: 0.3294125 total: 2.31s remaining: 2.23s 509: learn: 0.3291011 total: 2.32s remaining: 2.23s 510: learn: 0.3289021 total: 2.32s remaining: 2.22s 511: learn: 0.3285137 total: 2.33s remaining: 2.22s 512: learn: 0.3278964 total: 2.33s remaining: 2.21s 513: learn: 0.3274555 total: 2.34s remaining: 2.21s 514: learn: 0.3270691 total: 2.34s remaining: 2.2s 515: learn: 0.3268052 total: 2.35s remaining: 2.2s 516: learn: 0.3263295 total: 2.35s remaining: 2.19s 517: learn: 0.3258524 total: 2.35s remaining: 2.19s 518: learn: 0.3255794 total: 2.36s remaining: 2.19s 519: learn: 0.3251552 total: 2.36s remaining: 2.18s 520: learn: 0.3247569 total: 2.37s remaining: 2.18s 521: learn: 0.3242420 total: 2.37s remaining: 2.17s 522: learn: 0.3240746 total: 2.38s remaining: 2.17s 523: learn: 0.3239854 total: 2.38s remaining: 2.16s 524: learn: 0.3239415 total: 2.38s remaining: 2.16s 525: learn: 0.3236346 total: 2.39s remaining: 2.15s 526: learn: 0.3233439 total: 2.39s remaining: 2.15s 527: learn: 0.3232261 total: 2.4s remaining: 2.14s 528: learn: 0.3226489 total: 2.4s remaining: 2.14s 529: learn: 0.3221208 total: 2.41s remaining: 2.13s 530: learn: 0.3217815 total: 2.41s remaining: 2.13s 531: learn: 0.3216486 total: 2.42s remaining: 2.13s 532: learn: 0.3213358 total: 2.42s remaining: 2.12s 533: learn: 0.3211304 total: 2.43s remaining: 2.12s 534: learn: 0.3210996 total: 2.43s remaining: 2.11s 535: learn: 0.3207515 total: 2.44s remaining: 2.11s 536: learn: 0.3204738 total: 2.44s remaining: 2.1s 537: learn: 0.3198882 total: 2.44s remaining: 2.1s 538: learn: 0.3195154 total: 2.45s remaining: 2.09s 539: learn: 0.3192952 total: 2.45s remaining: 2.09s 540: learn: 0.3190280 total: 2.46s remaining: 2.08s 541: learn: 0.3187129 total: 2.46s remaining: 2.08s 542: learn: 0.3181960 total: 2.46s remaining: 2.07s 543: learn: 0.3176606 total: 2.47s remaining: 2.07s 544: learn: 0.3170949 total: 2.47s remaining: 2.06s 545: learn: 0.3167459 total: 2.48s remaining: 2.06s 546: learn: 0.3163222 total: 2.48s remaining: 2.06s 547: learn: 0.3161420 total: 2.49s remaining: 2.05s 548: learn: 0.3157992 total: 2.49s remaining: 2.05s 549: learn: 0.3152920 total: 2.5s remaining: 2.04s 550: learn: 0.3148228 total: 2.5s remaining: 2.04s 551: learn: 0.3145167 total: 2.5s remaining: 2.03s 552: learn: 0.3139810 total: 2.51s remaining: 2.03s 553: learn: 0.3133923 total: 2.51s remaining: 2.02s 554: learn: 0.3129725 total: 2.52s remaining: 2.02s 555: learn: 0.3125777 total: 2.52s remaining: 2.01s 556: learn: 0.3123591 total: 2.53s remaining: 2.01s 557: learn: 0.3118976 total: 2.53s remaining: 2s 558: learn: 0.3117266 total: 2.54s remaining: 2s 559: learn: 0.3114944 total: 2.54s remaining: 2s 560: learn: 0.3112216 total: 2.54s remaining: 1.99s 561: learn: 0.3108621 total: 2.55s remaining: 1.99s 562: learn: 0.3104885 total: 2.55s remaining: 1.98s 563: learn: 0.3103672 total: 2.56s remaining: 1.98s 564: learn: 0.3099546 total: 2.56s remaining: 1.97s 565: learn: 0.3094861 total: 2.57s remaining: 1.97s 566: learn: 0.3091734 total: 2.57s remaining: 1.96s 567: learn: 0.3089148 total: 2.58s remaining: 1.96s 568: learn: 0.3086673 total: 2.58s remaining: 1.95s 569: learn: 0.3082695 total: 2.58s remaining: 1.95s 570: learn: 0.3078891 total: 2.59s remaining: 1.94s 571: learn: 0.3074927 total: 2.59s remaining: 1.94s 572: learn: 0.3069862 total: 2.6s remaining: 1.94s 573: learn: 0.3064039 total: 2.6s remaining: 1.93s 574: learn: 0.3061550 total: 2.6s remaining: 1.93s 575: learn: 0.3056908 total: 2.61s remaining: 1.92s 576: learn: 0.3052593 total: 2.61s remaining: 1.92s 577: learn: 0.3049534 total: 2.62s remaining: 1.91s 578: learn: 0.3044688 total: 2.62s remaining: 1.91s 579: learn: 0.3041772 total: 2.63s remaining: 1.9s 580: learn: 0.3037533 total: 2.63s remaining: 1.9s 581: learn: 0.3035700 total: 2.64s remaining: 1.89s 582: learn: 0.3031629 total: 2.64s remaining: 1.89s 583: learn: 0.3028838 total: 2.64s remaining: 1.88s 584: learn: 0.3023030 total: 2.65s remaining: 1.88s 585: learn: 0.3019222 total: 2.65s remaining: 1.87s 586: learn: 0.3015915 total: 2.66s remaining: 1.87s 587: learn: 0.3015704 total: 2.66s remaining: 1.86s 588: learn: 0.3009768 total: 2.67s remaining: 1.86s 589: learn: 0.3008180 total: 2.67s remaining: 1.85s 590: learn: 0.3005118 total: 2.67s remaining: 1.85s 591: learn: 0.3002814 total: 2.68s remaining: 1.85s 592: learn: 0.3000431 total: 2.68s remaining: 1.84s 593: learn: 0.2997323 total: 2.69s remaining: 1.84s 594: learn: 0.2994389 total: 2.69s remaining: 1.83s 595: learn: 0.2993389 total: 2.7s remaining: 1.83s 596: learn: 0.2988775 total: 2.7s remaining: 1.82s 597: learn: 0.2987598 total: 2.7s remaining: 1.82s 598: learn: 0.2984033 total: 2.71s remaining: 1.81s 599: learn: 0.2978245 total: 2.71s remaining: 1.81s 600: learn: 0.2974830 total: 2.72s remaining: 1.8s 601: learn: 0.2969405 total: 2.72s remaining: 1.8s 602: learn: 0.2966278 total: 2.73s remaining: 1.79s 603: learn: 0.2962533 total: 2.73s remaining: 1.79s 604: learn: 0.2959855 total: 2.73s remaining: 1.78s 605: learn: 0.2956097 total: 2.74s remaining: 1.78s 606: learn: 0.2953299 total: 2.75s remaining: 1.78s 607: learn: 0.2951260 total: 2.75s remaining: 1.77s 608: learn: 0.2947087 total: 2.75s remaining: 1.77s 609: learn: 0.2944823 total: 2.76s remaining: 1.76s 610: learn: 0.2943654 total: 2.76s remaining: 1.76s 611: learn: 0.2941610 total: 2.77s remaining: 1.75s 612: learn: 0.2938108 total: 2.77s remaining: 1.75s 613: learn: 0.2934358 total: 2.78s remaining: 1.75s 614: learn: 0.2931197 total: 2.78s remaining: 1.74s 615: learn: 0.2930426 total: 2.79s remaining: 1.74s 616: learn: 0.2928295 total: 2.79s remaining: 1.73s 617: learn: 0.2924921 total: 2.79s remaining: 1.73s 618: learn: 0.2921751 total: 2.8s remaining: 1.72s 619: learn: 0.2918991 total: 2.8s remaining: 1.72s 620: learn: 0.2913899 total: 2.81s remaining: 1.71s 621: learn: 0.2908501 total: 2.81s remaining: 1.71s 622: learn: 0.2905626 total: 2.82s remaining: 1.7s 623: learn: 0.2902904 total: 2.82s remaining: 1.7s 624: learn: 0.2900379 total: 2.83s remaining: 1.7s 625: learn: 0.2896903 total: 2.83s remaining: 1.69s 626: learn: 0.2892638 total: 2.83s remaining: 1.69s 627: learn: 0.2889578 total: 2.84s remaining: 1.68s 628: learn: 0.2887475 total: 2.84s remaining: 1.68s 629: learn: 0.2883429 total: 2.85s remaining: 1.67s 630: learn: 0.2881113 total: 2.85s remaining: 1.67s 631: learn: 0.2876183 total: 2.85s remaining: 1.66s 632: learn: 0.2875565 total: 2.86s remaining: 1.66s 633: learn: 0.2871677 total: 2.86s remaining: 1.65s 634: learn: 0.2868290 total: 2.87s remaining: 1.65s 635: learn: 0.2866803 total: 2.87s remaining: 1.64s 636: learn: 0.2864461 total: 2.88s remaining: 1.64s 637: learn: 0.2861437 total: 2.88s remaining: 1.64s 638: learn: 0.2858220 total: 2.89s remaining: 1.63s 639: learn: 0.2856065 total: 2.89s remaining: 1.63s 640: learn: 0.2853185 total: 2.89s remaining: 1.62s 641: learn: 0.2849693 total: 2.9s remaining: 1.62s 642: learn: 0.2844873 total: 2.9s remaining: 1.61s 643: learn: 0.2841484 total: 2.91s remaining: 1.61s 644: learn: 0.2838365 total: 2.91s remaining: 1.6s 645: learn: 0.2835952 total: 2.92s remaining: 1.6s 646: learn: 0.2831927 total: 2.92s remaining: 1.59s 647: learn: 0.2827722 total: 2.92s remaining: 1.59s 648: learn: 0.2824569 total: 2.93s remaining: 1.58s 649: learn: 0.2821450 total: 2.93s remaining: 1.58s 650: learn: 0.2820499 total: 2.94s remaining: 1.57s 651: learn: 0.2818476 total: 2.94s remaining: 1.57s 652: learn: 0.2815995 total: 2.95s remaining: 1.57s 653: learn: 0.2812118 total: 2.95s remaining: 1.56s 654: learn: 0.2809202 total: 2.96s remaining: 1.56s 655: learn: 0.2806815 total: 2.96s remaining: 1.55s 656: learn: 0.2802227 total: 2.96s remaining: 1.55s 657: learn: 0.2800624 total: 2.97s remaining: 1.54s 658: learn: 0.2795849 total: 2.97s remaining: 1.54s 659: learn: 0.2793078 total: 2.98s remaining: 1.53s 660: learn: 0.2791128 total: 2.98s remaining: 1.53s 661: learn: 0.2788399 total: 2.99s remaining: 1.52s 662: learn: 0.2788037 total: 2.99s remaining: 1.52s 663: learn: 0.2786668 total: 3s remaining: 1.51s 664: learn: 0.2784946 total: 3s remaining: 1.51s 665: learn: 0.2780830 total: 3s remaining: 1.51s 666: learn: 0.2778176 total: 3.01s remaining: 1.5s 667: learn: 0.2775362 total: 3.01s remaining: 1.5s 668: learn: 0.2771948 total: 3.02s remaining: 1.49s 669: learn: 0.2770151 total: 3.02s remaining: 1.49s 670: learn: 0.2766705 total: 3.03s remaining: 1.48s 671: learn: 0.2761992 total: 3.03s remaining: 1.48s 672: learn: 0.2761667 total: 3.04s remaining: 1.48s 673: learn: 0.2758894 total: 3.04s remaining: 1.47s 674: learn: 0.2754654 total: 3.04s remaining: 1.47s 675: learn: 0.2751694 total: 3.05s remaining: 1.46s 676: learn: 0.2747818 total: 3.05s remaining: 1.46s 677: learn: 0.2743284 total: 3.06s remaining: 1.45s 678: learn: 0.2738872 total: 3.06s remaining: 1.45s 679: learn: 0.2735984 total: 3.07s remaining: 1.44s 680: learn: 0.2732869 total: 3.07s remaining: 1.44s 681: learn: 0.2730972 total: 3.07s remaining: 1.43s 682: learn: 0.2728736 total: 3.08s remaining: 1.43s 683: learn: 0.2726755 total: 3.08s remaining: 1.42s 684: learn: 0.2723927 total: 3.09s remaining: 1.42s 685: learn: 0.2719884 total: 3.09s remaining: 1.42s 686: learn: 0.2715762 total: 3.1s remaining: 1.41s 687: learn: 0.2713579 total: 3.1s remaining: 1.41s 688: learn: 0.2711750 total: 3.1s remaining: 1.4s 689: learn: 0.2709947 total: 3.11s remaining: 1.4s 690: learn: 0.2707097 total: 3.11s remaining: 1.39s 691: learn: 0.2704260 total: 3.12s remaining: 1.39s 692: learn: 0.2701645 total: 3.12s remaining: 1.38s 693: learn: 0.2698393 total: 3.13s remaining: 1.38s 694: learn: 0.2695105 total: 3.13s remaining: 1.37s 695: learn: 0.2691283 total: 3.14s remaining: 1.37s 696: learn: 0.2688368 total: 3.14s remaining: 1.36s 697: learn: 0.2683466 total: 3.15s remaining: 1.36s 698: learn: 0.2680603 total: 3.15s remaining: 1.36s 699: learn: 0.2677699 total: 3.15s remaining: 1.35s 700: learn: 0.2674562 total: 3.16s remaining: 1.35s 701: learn: 0.2671464 total: 3.16s remaining: 1.34s 702: learn: 0.2668508 total: 3.17s remaining: 1.34s 703: learn: 0.2666627 total: 3.17s remaining: 1.33s 704: learn: 0.2663736 total: 3.18s remaining: 1.33s 705: learn: 0.2660888 total: 3.18s remaining: 1.32s 706: learn: 0.2658267 total: 3.18s remaining: 1.32s 707: learn: 0.2656145 total: 3.19s remaining: 1.31s 708: learn: 0.2654194 total: 3.19s remaining: 1.31s 709: learn: 0.2651461 total: 3.2s remaining: 1.3s 710: learn: 0.2647736 total: 3.2s remaining: 1.3s 711: learn: 0.2644030 total: 3.21s remaining: 1.3s 712: learn: 0.2641493 total: 3.21s remaining: 1.29s 713: learn: 0.2636034 total: 3.21s remaining: 1.29s 714: learn: 0.2633467 total: 3.22s remaining: 1.28s 715: learn: 0.2630884 total: 3.22s remaining: 1.28s 716: learn: 0.2627231 total: 3.23s remaining: 1.27s 717: learn: 0.2623737 total: 3.23s remaining: 1.27s 718: learn: 0.2620890 total: 3.23s remaining: 1.26s 719: learn: 0.2619557 total: 3.24s remaining: 1.26s 720: learn: 0.2615330 total: 3.25s remaining: 1.25s 721: learn: 0.2613678 total: 3.25s remaining: 1.25s 722: learn: 0.2609768 total: 3.25s remaining: 1.25s 723: learn: 0.2607843 total: 3.26s remaining: 1.24s 724: learn: 0.2604017 total: 3.26s remaining: 1.24s 725: learn: 0.2601656 total: 3.27s remaining: 1.23s 726: learn: 0.2598170 total: 3.27s remaining: 1.23s 727: learn: 0.2596272 total: 3.27s remaining: 1.22s 728: learn: 0.2594368 total: 3.28s remaining: 1.22s 729: learn: 0.2591430 total: 3.28s remaining: 1.21s 730: learn: 0.2588290 total: 3.29s remaining: 1.21s 731: learn: 0.2585789 total: 3.29s remaining: 1.21s 732: learn: 0.2584455 total: 3.3s remaining: 1.2s 733: learn: 0.2584104 total: 3.3s remaining: 1.2s 734: learn: 0.2582152 total: 3.3s remaining: 1.19s 735: learn: 0.2581397 total: 3.31s remaining: 1.19s 736: learn: 0.2578583 total: 3.31s remaining: 1.18s 737: learn: 0.2576197 total: 3.32s remaining: 1.18s 738: learn: 0.2573741 total: 3.32s remaining: 1.17s 739: learn: 0.2571082 total: 3.33s remaining: 1.17s 740: learn: 0.2568704 total: 3.33s remaining: 1.16s 741: learn: 0.2566573 total: 3.33s remaining: 1.16s 742: learn: 0.2564107 total: 3.34s remaining: 1.16s 743: learn: 0.2561631 total: 3.35s remaining: 1.15s 744: learn: 0.2561312 total: 3.35s remaining: 1.15s 745: learn: 0.2557099 total: 3.36s remaining: 1.14s 746: learn: 0.2554609 total: 3.36s remaining: 1.14s 747: learn: 0.2552453 total: 3.36s remaining: 1.13s 748: learn: 0.2549054 total: 3.37s remaining: 1.13s 749: learn: 0.2546371 total: 3.37s remaining: 1.12s 750: learn: 0.2543651 total: 3.38s remaining: 1.12s 751: learn: 0.2537761 total: 3.38s remaining: 1.11s 752: learn: 0.2535814 total: 3.39s remaining: 1.11s 753: learn: 0.2531956 total: 3.39s remaining: 1.11s 754: learn: 0.2531389 total: 3.39s remaining: 1.1s 755: learn: 0.2529298 total: 3.4s remaining: 1.1s 756: learn: 0.2527376 total: 3.4s remaining: 1.09s 757: learn: 0.2524734 total: 3.41s remaining: 1.09s 758: learn: 0.2521595 total: 3.41s remaining: 1.08s 759: learn: 0.2519621 total: 3.42s remaining: 1.08s 760: learn: 0.2516846 total: 3.42s remaining: 1.07s 761: learn: 0.2512891 total: 3.42s remaining: 1.07s 762: learn: 0.2510728 total: 3.43s remaining: 1.06s 763: learn: 0.2507176 total: 3.43s remaining: 1.06s 764: learn: 0.2503362 total: 3.44s remaining: 1.05s 765: learn: 0.2502037 total: 3.44s remaining: 1.05s 766: learn: 0.2498936 total: 3.45s remaining: 1.05s 767: learn: 0.2496633 total: 3.45s remaining: 1.04s 768: learn: 0.2493097 total: 3.46s remaining: 1.04s 769: learn: 0.2489150 total: 3.46s remaining: 1.03s 770: learn: 0.2486851 total: 3.46s remaining: 1.03s 771: learn: 0.2483194 total: 3.47s remaining: 1.02s 772: learn: 0.2479421 total: 3.47s remaining: 1.02s 773: learn: 0.2478561 total: 3.48s remaining: 1.01s 774: learn: 0.2475209 total: 3.48s remaining: 1.01s 775: learn: 0.2471628 total: 3.48s remaining: 1.01s 776: learn: 0.2469314 total: 3.49s remaining: 1s 777: learn: 0.2466425 total: 3.49s remaining: 997ms 778: learn: 0.2465065 total: 3.5s remaining: 993ms 779: learn: 0.2461220 total: 3.5s remaining: 988ms 780: learn: 0.2458115 total: 3.51s remaining: 983ms 781: learn: 0.2456707 total: 3.51s remaining: 979ms 782: learn: 0.2454929 total: 3.52s remaining: 974ms 783: learn: 0.2451706 total: 3.52s remaining: 970ms 784: learn: 0.2448523 total: 3.52s remaining: 965ms 785: learn: 0.2445772 total: 3.53s remaining: 961ms 786: learn: 0.2443875 total: 3.53s remaining: 956ms 787: learn: 0.2441339 total: 3.54s remaining: 952ms 788: learn: 0.2438773 total: 3.54s remaining: 948ms 789: learn: 0.2438140 total: 3.55s remaining: 943ms 790: learn: 0.2434810 total: 3.55s remaining: 939ms 791: learn: 0.2429906 total: 3.56s remaining: 934ms 792: learn: 0.2426931 total: 3.56s remaining: 930ms 793: learn: 0.2424043 total: 3.56s remaining: 925ms 794: learn: 0.2421210 total: 3.57s remaining: 920ms 795: learn: 0.2419921 total: 3.57s remaining: 916ms 796: learn: 0.2418543 total: 3.58s remaining: 911ms 797: learn: 0.2418231 total: 3.58s remaining: 907ms 798: learn: 0.2415865 total: 3.59s remaining: 902ms 799: learn: 0.2412327 total: 3.59s remaining: 898ms 800: learn: 0.2410130 total: 3.6s remaining: 893ms 801: learn: 0.2406898 total: 3.6s remaining: 889ms 802: learn: 0.2403997 total: 3.6s remaining: 884ms 803: learn: 0.2403092 total: 3.61s remaining: 880ms 804: learn: 0.2399602 total: 3.61s remaining: 875ms 805: learn: 0.2397046 total: 3.62s remaining: 870ms 806: learn: 0.2395811 total: 3.62s remaining: 866ms 807: learn: 0.2393554 total: 3.62s remaining: 861ms 808: learn: 0.2389797 total: 3.63s remaining: 857ms 809: learn: 0.2386822 total: 3.63s remaining: 852ms 810: learn: 0.2385272 total: 3.64s remaining: 848ms 811: learn: 0.2383253 total: 3.64s remaining: 843ms 812: learn: 0.2382769 total: 3.65s remaining: 839ms 813: learn: 0.2380034 total: 3.65s remaining: 834ms 814: learn: 0.2378123 total: 3.65s remaining: 830ms 815: learn: 0.2376812 total: 3.66s remaining: 825ms 816: learn: 0.2375900 total: 3.66s remaining: 821ms 817: learn: 0.2373746 total: 3.67s remaining: 817ms 818: learn: 0.2368950 total: 3.67s remaining: 812ms 819: learn: 0.2365626 total: 3.68s remaining: 808ms 820: learn: 0.2364160 total: 3.68s remaining: 803ms 821: learn: 0.2362066 total: 3.69s remaining: 798ms 822: learn: 0.2357433 total: 3.69s remaining: 794ms 823: learn: 0.2355107 total: 3.69s remaining: 789ms 824: learn: 0.2353031 total: 3.7s remaining: 785ms 825: learn: 0.2349189 total: 3.7s remaining: 780ms 826: learn: 0.2345722 total: 3.71s remaining: 776ms 827: learn: 0.2342914 total: 3.71s remaining: 771ms 828: learn: 0.2340122 total: 3.72s remaining: 767ms 829: learn: 0.2337871 total: 3.72s remaining: 762ms 830: learn: 0.2337141 total: 3.73s remaining: 758ms 831: learn: 0.2334017 total: 3.73s remaining: 754ms 832: learn: 0.2331498 total: 3.74s remaining: 749ms 833: learn: 0.2329182 total: 3.74s remaining: 745ms 834: learn: 0.2326496 total: 3.75s remaining: 740ms 835: learn: 0.2323721 total: 3.75s remaining: 736ms 836: learn: 0.2321663 total: 3.75s remaining: 731ms 837: learn: 0.2317915 total: 3.76s remaining: 727ms 838: learn: 0.2317042 total: 3.76s remaining: 722ms 839: learn: 0.2315415 total: 3.77s remaining: 718ms 840: learn: 0.2312144 total: 3.77s remaining: 713ms 841: learn: 0.2309861 total: 3.78s remaining: 709ms 842: learn: 0.2308992 total: 3.78s remaining: 704ms 843: learn: 0.2307248 total: 3.78s remaining: 699ms 844: learn: 0.2304123 total: 3.79s remaining: 695ms 845: learn: 0.2302900 total: 3.79s remaining: 690ms 846: learn: 0.2299298 total: 3.8s remaining: 686ms 847: learn: 0.2297649 total: 3.8s remaining: 681ms 848: learn: 0.2296314 total: 3.81s remaining: 677ms 849: learn: 0.2292939 total: 3.81s remaining: 672ms 850: learn: 0.2291602 total: 3.81s remaining: 668ms 851: learn: 0.2288865 total: 3.82s remaining: 663ms 852: learn: 0.2286853 total: 3.82s remaining: 659ms 853: learn: 0.2285428 total: 3.83s remaining: 654ms 854: learn: 0.2284197 total: 3.83s remaining: 650ms 855: learn: 0.2282277 total: 3.83s remaining: 645ms 856: learn: 0.2278907 total: 3.84s remaining: 641ms 857: learn: 0.2277383 total: 3.84s remaining: 636ms 858: learn: 0.2275067 total: 3.85s remaining: 632ms 859: learn: 0.2272363 total: 3.85s remaining: 627ms 860: learn: 0.2269635 total: 3.86s remaining: 623ms 861: learn: 0.2265979 total: 3.86s remaining: 618ms 862: learn: 0.2264905 total: 3.87s remaining: 614ms 863: learn: 0.2262386 total: 3.87s remaining: 609ms 864: learn: 0.2260311 total: 3.87s remaining: 605ms 865: learn: 0.2258230 total: 3.88s remaining: 600ms 866: learn: 0.2255610 total: 3.88s remaining: 596ms 867: learn: 0.2254607 total: 3.89s remaining: 591ms 868: learn: 0.2250433 total: 3.89s remaining: 587ms 869: learn: 0.2249648 total: 3.9s remaining: 582ms 870: learn: 0.2247922 total: 3.9s remaining: 578ms 871: learn: 0.2244233 total: 3.9s remaining: 573ms 872: learn: 0.2242406 total: 3.91s remaining: 569ms 873: learn: 0.2240229 total: 3.91s remaining: 564ms 874: learn: 0.2238258 total: 3.92s remaining: 560ms 875: learn: 0.2237571 total: 3.92s remaining: 555ms 876: learn: 0.2235548 total: 3.93s remaining: 551ms 877: learn: 0.2234135 total: 3.93s remaining: 546ms 878: learn: 0.2230922 total: 3.94s remaining: 542ms 879: learn: 0.2226509 total: 3.94s remaining: 537ms 880: learn: 0.2224404 total: 3.94s remaining: 533ms 881: learn: 0.2223705 total: 3.95s remaining: 528ms 882: learn: 0.2223260 total: 3.95s remaining: 524ms 883: learn: 0.2220318 total: 3.96s remaining: 519ms 884: learn: 0.2218552 total: 3.96s remaining: 515ms 885: learn: 0.2217687 total: 3.97s remaining: 510ms 886: learn: 0.2214940 total: 3.97s remaining: 506ms 887: learn: 0.2213258 total: 3.98s remaining: 501ms 888: learn: 0.2212884 total: 3.98s remaining: 497ms 889: learn: 0.2210636 total: 3.98s remaining: 493ms 890: learn: 0.2208620 total: 3.99s remaining: 488ms 891: learn: 0.2207268 total: 4s remaining: 484ms 892: learn: 0.2205061 total: 4s remaining: 479ms 893: learn: 0.2203278 total: 4s remaining: 475ms 894: learn: 0.2200449 total: 4.01s remaining: 470ms 895: learn: 0.2197147 total: 4.01s remaining: 466ms 896: learn: 0.2195113 total: 4.02s remaining: 461ms 897: learn: 0.2192671 total: 4.02s remaining: 457ms 898: learn: 0.2192258 total: 4.03s remaining: 452ms 899: learn: 0.2189515 total: 4.03s remaining: 448ms 900: learn: 0.2186505 total: 4.03s remaining: 443ms 901: learn: 0.2184193 total: 4.04s remaining: 439ms 902: learn: 0.2183672 total: 4.04s remaining: 434ms 903: learn: 0.2181273 total: 4.05s remaining: 430ms 904: learn: 0.2178674 total: 4.05s remaining: 425ms 905: learn: 0.2176005 total: 4.05s remaining: 421ms 906: learn: 0.2174875 total: 4.06s remaining: 416ms 907: learn: 0.2171777 total: 4.06s remaining: 412ms 908: learn: 0.2169544 total: 4.07s remaining: 407ms 909: learn: 0.2167341 total: 4.07s remaining: 403ms 910: learn: 0.2166133 total: 4.08s remaining: 398ms 911: learn: 0.2164535 total: 4.08s remaining: 394ms 912: learn: 0.2164128 total: 4.08s remaining: 389ms 913: learn: 0.2162686 total: 4.09s remaining: 385ms 914: learn: 0.2160413 total: 4.09s remaining: 380ms 915: learn: 0.2158222 total: 4.1s remaining: 376ms 916: learn: 0.2157209 total: 4.1s remaining: 371ms 917: learn: 0.2155155 total: 4.11s remaining: 367ms 918: learn: 0.2154142 total: 4.11s remaining: 362ms 919: learn: 0.2151079 total: 4.12s remaining: 358ms 920: learn: 0.2149705 total: 4.12s remaining: 353ms 921: learn: 0.2149322 total: 4.12s remaining: 349ms 922: learn: 0.2148807 total: 4.13s remaining: 344ms 923: learn: 0.2146427 total: 4.13s remaining: 340ms 924: learn: 0.2145465 total: 4.14s remaining: 335ms 925: learn: 0.2143896 total: 4.14s remaining: 331ms 926: learn: 0.2141042 total: 4.15s remaining: 327ms 927: learn: 0.2140653 total: 4.15s remaining: 322ms 928: learn: 0.2138073 total: 4.16s remaining: 318ms 929: learn: 0.2135790 total: 4.16s remaining: 313ms 930: learn: 0.2135487 total: 4.16s remaining: 309ms 931: learn: 0.2134147 total: 4.17s remaining: 304ms 932: learn: 0.2132543 total: 4.17s remaining: 300ms 933: learn: 0.2131319 total: 4.18s remaining: 295ms 934: learn: 0.2128832 total: 4.18s remaining: 291ms 935: learn: 0.2125868 total: 4.18s remaining: 286ms 936: learn: 0.2124035 total: 4.19s remaining: 282ms 937: learn: 0.2121759 total: 4.19s remaining: 277ms 938: learn: 0.2120445 total: 4.2s remaining: 273ms 939: learn: 0.2118077 total: 4.2s remaining: 268ms 940: learn: 0.2114668 total: 4.21s remaining: 264ms 941: learn: 0.2111545 total: 4.21s remaining: 259ms 942: learn: 0.2111246 total: 4.21s remaining: 255ms 943: learn: 0.2109490 total: 4.22s remaining: 250ms 944: learn: 0.2107927 total: 4.22s remaining: 246ms 945: learn: 0.2105585 total: 4.23s remaining: 241ms 946: learn: 0.2105239 total: 4.23s remaining: 237ms 947: learn: 0.2104112 total: 4.24s remaining: 232ms 948: learn: 0.2101941 total: 4.24s remaining: 228ms 949: learn: 0.2101563 total: 4.24s remaining: 223ms 950: learn: 0.2101322 total: 4.25s remaining: 219ms 951: learn: 0.2098230 total: 4.25s remaining: 214ms 952: learn: 0.2097301 total: 4.26s remaining: 210ms 953: learn: 0.2095164 total: 4.26s remaining: 206ms 954: learn: 0.2093993 total: 4.27s remaining: 201ms 955: learn: 0.2091224 total: 4.27s remaining: 197ms 956: learn: 0.2089994 total: 4.28s remaining: 192ms 957: learn: 0.2088083 total: 4.28s remaining: 188ms 958: learn: 0.2087013 total: 4.29s remaining: 183ms 959: learn: 0.2084003 total: 4.29s remaining: 179ms 960: learn: 0.2082851 total: 4.29s remaining: 174ms 961: learn: 0.2080820 total: 4.3s remaining: 170ms 962: learn: 0.2078811 total: 4.3s remaining: 165ms 963: learn: 0.2076056 total: 4.31s remaining: 161ms 964: learn: 0.2074108 total: 4.31s remaining: 156ms 965: learn: 0.2070406 total: 4.32s remaining: 152ms 966: learn: 0.2067699 total: 4.32s remaining: 148ms 967: learn: 0.2063896 total: 4.33s remaining: 143ms 968: learn: 0.2060907 total: 4.33s remaining: 139ms 969: learn: 0.2059021 total: 4.33s remaining: 134ms 970: learn: 0.2056345 total: 4.34s remaining: 130ms 971: learn: 0.2053408 total: 4.34s remaining: 125ms 972: learn: 0.2050676 total: 4.35s remaining: 121ms 973: learn: 0.2048304 total: 4.35s remaining: 116ms 974: learn: 0.2046749 total: 4.36s remaining: 112ms 975: learn: 0.2044718 total: 4.36s remaining: 107ms 976: learn: 0.2041876 total: 4.37s remaining: 103ms 977: learn: 0.2039090 total: 4.37s remaining: 98.3ms 978: learn: 0.2037805 total: 4.38s remaining: 93.9ms 979: learn: 0.2034705 total: 4.38s remaining: 89.4ms 980: learn: 0.2032025 total: 4.38s remaining: 84.9ms 981: learn: 0.2030209 total: 4.39s remaining: 80.4ms 982: learn: 0.2027762 total: 4.39s remaining: 76ms 983: learn: 0.2026031 total: 4.4s remaining: 71.5ms 984: learn: 0.2023929 total: 4.4s remaining: 67ms 985: learn: 0.2021011 total: 4.41s remaining: 62.6ms 986: learn: 0.2018960 total: 4.41s remaining: 58.1ms 987: learn: 0.2017884 total: 4.41s remaining: 53.6ms 988: learn: 0.2014352 total: 4.42s remaining: 49.1ms 989: learn: 0.2012731 total: 4.42s remaining: 44.7ms 990: learn: 0.2010667 total: 4.43s remaining: 40.2ms 991: learn: 0.2008429 total: 4.43s remaining: 35.7ms 992: learn: 0.2006669 total: 4.43s remaining: 31.3ms 993: learn: 0.2005137 total: 4.44s remaining: 26.8ms 994: learn: 0.2001325 total: 4.44s remaining: 22.3ms 995: learn: 0.1999855 total: 4.45s remaining: 17.9ms 996: learn: 0.1996729 total: 4.45s remaining: 13.4ms 997: learn: 0.1994904 total: 4.46s remaining: 8.93ms 998: learn: 0.1993044 total: 4.46s remaining: 4.47ms 999: learn: 0.1989547 total: 4.47s remaining: 0us
df_compare
| Model | Mean Squared Error | Mean Absolute Error | R2 Score | Training Time (s) | |
|---|---|---|---|---|---|
| 0 | Ridge Regression | 0.746614 | 0.598242 | 0.98175 | 0.002745 |
| 1 | Linear Regression | 0.77558 | 0.613905 | 0.981042 | 0.015371 |
| 2 | Lasso Regression | 1.066347 | 0.782536 | 0.973934 | 0.104589 |
| 3 | Elastic Net | 1.070319 | 0.718474 | 0.973837 | 0.108071 |
| 4 | Gradient Boosting | 87.489334 | 7.501632 | -1.138585 | 2.444588 |
| 5 | AdaBoost | 88.43475 | 7.561282 | -1.161695 | 0.786036 |
| 6 | Random Forest | 90.124875 | 7.670387 | -1.203008 | 4.302596 |
| 7 | Decision Tree | 91.634349 | 7.746372 | -1.239906 | 0.079427 |
| 11 | CatBoost | 93.180853 | 7.853729 | -1.277709 | 4.704044 |
| 8 | XGBoost | 94.682514 | 7.893578 | -1.314415 | 0.665853 |
| 9 | K-Neighbors Regressor | 330.959365 | 16.750109 | -7.089956 | 0.000767 |
| 10 | SVR | 349.552891 | 17.672316 | -7.544455 | 0.150409 |
We trained a variety of regression models to predict stock prices, including Linear Regression, Ridge Regression, Lasso Regression, Elastic Net, Support Vector Regression (SVR), K-Neighbors Regressor, Decision Tree, Random Forest, Gradient Boosting, AdaBoost, XGBoost, and CatBoost. The training results show a variety of metrics for different regression models, which are useful in evaluating their performance. Let's break down what each metric means and its significance:
# Train the linear regression model
lr_model_base = LinearRegression()
lr_model_base.fit(X_train_scaled, y_train)
# Make predictions on the scaled test set
lr_pred_base = lr_model_base.predict(X_test_scaled)
prediction_df = pd.DataFrame()
prediction_df['date'] = df[df.date.dt.year>=2020]['date']
prediction_df['y_test'] = y_test
prediction_df['lr_pred_base'] = lr_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | |
|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 |
lr_score_base = evaluate_regression_model(y_test, lr_pred_base)
Mean Squared Error (MSE): 0.776 Root Mean Squared Error (RMSE): 0.881 Mean Absolute Error (MAE): 0.614 R-squared (R2): 0.981
lr_score_base
{'MSE': 0.7755799343709945,
'RMSE': 0.880670162076015,
'MAE': 0.6139047642970673,
'R2': 0.9810417591179742}
plot_regression_accuracy(y_test, lr_pred_base)
plot_predictions(df,lr_pred_base)
lr_base_feature_importance = plot_feature_importance(lr_model_base,X_train,20)
lr_base_feature_importance[:15]
| Feature | Importance | |
|---|---|---|
| 0 | adj close_10d_avg | 59.927343 |
| 1 | close_10d_avg | 52.842967 |
| 2 | close_15d_avg | 32.217386 |
| 3 | ema_9 | 31.326586 |
| 4 | adj close_15d_avg | 29.982687 |
| 5 | close_5d_avg | 17.929065 |
| 6 | sma_5 | 13.495880 |
| 7 | adj close_5d_avg | 12.461717 |
| 8 | adj close_3d_avg | 10.395039 |
| 9 | close_1d_ago | 9.861051 |
| 10 | adj close_7d_ago | 9.733592 |
| 11 | adj close_1d_ago | 9.044414 |
| 12 | close_7d_ago | 7.605479 |
| 13 | sma_15 | 6.911237 |
| 14 | adj close_14d_ago | 5.986732 |
keep_cols20 = lr_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train the linear regression model
lr_model20 = LinearRegression()
lr_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
lr_pred20 = lr_model20.predict(X_test_scaled20)
lr_score20 = evaluate_regression_model(y_test, lr_pred20)
Mean Squared Error (MSE): 0.768 Root Mean Squared Error (RMSE): 0.877 Mean Absolute Error (MAE): 0.613 R-squared (R2): 0.981
prediction_df['lr_pred20'] = lr_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | |
|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 |
lr_score20
{'MSE': 0.7682885573217368,
'RMSE': 0.8765207112908039,
'MAE': 0.6133462584657988,
'R2': 0.9812199892092073}
plot_feature_importance(lr_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | adj close_10d_avg | 39.701594 |
| 1 | close_10d_avg | 34.492844 |
| 2 | ema_9 | 16.380613 |
| 3 | adj close_7d_avg | 14.543602 |
| 4 | sma_5 | 11.032622 |
| 5 | close_15d_avg | 10.687774 |
| 6 | sma_15 | 9.954755 |
| 7 | adj close_5d_avg | 8.830203 |
| 8 | adj close_7d_ago | 8.207036 |
| 9 | adj close_15d_avg | 7.303476 |
| 10 | close_7d_ago | 7.068989 |
| 11 | adj close_1d_ago | 6.587839 |
| 12 | close_5d_avg | 6.427914 |
| 13 | close_1d_ago | 4.837201 |
| 14 | adj close_3d_avg | 4.708264 |
| 15 | close_3d_ago | 4.101885 |
| 16 | adj close_3d_ago | 3.394063 |
| 17 | low_5d_avg | 1.134743 |
| 18 | low_10d_avg | 0.507845 |
| 19 | adj close_14d_ago | 0.220884 |
keep_cols15 = lr_base_feature_importance[:15]['Feature'].tolist()
X_train15 = X_train[keep_cols15]
X_test15 = X_test[keep_cols15]
scaler = StandardScaler()
X_train_scaled15 = scaler.fit_transform(X_train15)
X_test_scaled15 = scaler.transform(X_test15)
# Train the linear regression model
lr_model15 = LinearRegression()
lr_model15.fit(X_train_scaled15, y_train)
# Make predictions on the scaled test set
lr_pred15 = lr_model15.predict(X_test_scaled15)
lr_score15 = evaluate_regression_model(y_test, lr_pred15)
Mean Squared Error (MSE): 0.797 Root Mean Squared Error (RMSE): 0.893 Mean Absolute Error (MAE): 0.626 R-squared (R2): 0.981
prediction_df['lr_pred15'] = lr_pred15
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | |
|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 |
lr_score15
{'MSE': 0.7968925999367115,
'RMSE': 0.8926884114497686,
'MAE': 0.6261544545364333,
'R2': 0.9805207932836008}
plot_feature_importance(lr_model15,X_train15,15)
| Feature | Importance | |
|---|---|---|
| 0 | close_10d_avg | 29.471317 |
| 1 | adj close_10d_avg | 24.455782 |
| 2 | ema_9 | 21.480216 |
| 3 | adj close_5d_avg | 16.328249 |
| 4 | adj close_1d_ago | 10.320838 |
| 5 | adj close_3d_avg | 9.162211 |
| 6 | close_1d_ago | 7.864891 |
| 7 | adj close_7d_ago | 7.162200 |
| 8 | sma_15 | 6.734176 |
| 9 | close_7d_ago | 6.356886 |
| 10 | close_5d_avg | 4.707638 |
| 11 | close_15d_avg | 3.343602 |
| 12 | sma_5 | 2.379653 |
| 13 | adj close_15d_avg | 2.216233 |
| 14 | adj close_14d_ago | 0.415949 |
keep_cols10 = lr_base_feature_importance[:10]['Feature'].tolist()
X_train10 = X_train[keep_cols10]
X_test10 = X_test[keep_cols10]
scaler = StandardScaler()
X_train_scaled10 = scaler.fit_transform(X_train10)
X_test_scaled10 = scaler.transform(X_test10)
# Train the linear regression model
lr_model10 = LinearRegression()
lr_model10.fit(X_train_scaled10, y_train)
# Make predictions on the scaled test set
lr_pred10 = lr_model10.predict(X_test_scaled10)
lr_score10 = evaluate_regression_model(y_test, lr_pred10)
Mean Squared Error (MSE): 0.786 Root Mean Squared Error (RMSE): 0.887 Mean Absolute Error (MAE): 0.626 R-squared (R2): 0.981
prediction_df['lr_pred10'] = lr_pred10
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | |
|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 |
lr_score10
{'MSE': 0.7864993593280865,
'RMSE': 0.8868479911056271,
'MAE': 0.6260216655405225,
'R2': 0.9807748451875646}
plot_feature_importance(lr_model10,X_train10,10)
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 16.842727 |
| 1 | close_5d_avg | 14.158982 |
| 2 | adj close_3d_avg | 8.615665 |
| 3 | adj close_5d_avg | 6.493537 |
| 4 | close_15d_avg | 6.467632 |
| 5 | close_10d_avg | 6.069412 |
| 6 | sma_5 | 3.130954 |
| 7 | adj close_10d_avg | 1.666197 |
| 8 | close_1d_ago | 0.905094 |
| 9 | adj close_15d_avg | 0.029654 |
ridge_model = Ridge()
# Define the hyperparameter grid to search
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=ridge_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)
# Get the best model
best_ridge_model = grid_search.best_estimator_
# Make predictions on the test set
ridge_pred_base = best_ridge_model.predict(X_test_scaled)
# Evaluate the best model
mse = mean_squared_error(y_test, ridge_pred_base)
rmse = mean_squared_error(y_test, ridge_pred_base, squared=False)
mae = mean_absolute_error(y_test, ridge_pred_base)
r2 = r2_score(y_test, ridge_pred_base)
print("Best Ridge Regression Model:")
print(f"Best alpha: {best_ridge_model.alpha}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")
ridge_score = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
Best Ridge Regression Model: Best alpha: 0.001 Root Mean Squared Error (RMSE): 0.871 Mean Squared Error: 0.759 Mean Absolute Error: 0.606 R2 Score: 0.981
ridge_base_feature_importance = plot_feature_importance(best_ridge_model,X_train,20)
ridge_base_feature_importance[:20]
| Feature | Importance | |
|---|---|---|
| 0 | close_5d_avg | 19.627386 |
| 1 | ema_9 | 18.897025 |
| 2 | sma_5 | 18.403314 |
| 3 | adj close_5d_avg | 11.844801 |
| 4 | close_10d_avg | 9.872194 |
| 5 | adj close_10d_avg | 9.389002 |
| 6 | close_15d_avg | 7.887685 |
| 7 | close_1d_ago | 7.038813 |
| 8 | close_7d_avg | 6.958126 |
| 9 | adj close_1d_ago | 6.600528 |
| 10 | close_5d_ago | 6.008206 |
| 11 | sma_15 | 5.538994 |
| 12 | low_5d_avg | 4.965283 |
| 13 | low_10d_avg | 4.840114 |
| 14 | open_5d_avg | 4.446916 |
| 15 | high_5d_avg | 3.714418 |
| 16 | open_10d_avg | 3.486739 |
| 17 | high_30d_avg | 2.896473 |
| 18 | close_3d_ago | 2.893431 |
| 19 | open_15d_avg | 2.892245 |
prediction_df['ridge_pred_base'] = ridge_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | |
|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 |
keep_cols20 = ridge_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train model
ridge_model20 = Ridge(alpha=0.001)
ridge_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
ridge_pred20 = ridge_model20.predict(X_test_scaled20)
ridge_score20 = evaluate_regression_model(y_test, ridge_pred20)
Mean Squared Error (MSE): 0.73 Root Mean Squared Error (RMSE): 0.854 Mean Absolute Error (MAE): 0.586 R-squared (R2): 0.982
plot_feature_importance(ridge_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | sma_5 | 25.441510 |
| 1 | close_5d_avg | 22.435160 |
| 2 | adj close_5d_avg | 11.490239 |
| 3 | adj close_1d_ago | 5.671591 |
| 4 | adj close_10d_avg | 5.507894 |
| 5 | close_5d_ago | 5.147037 |
| 6 | close_1d_ago | 4.554521 |
| 7 | close_10d_avg | 4.509604 |
| 8 | close_15d_avg | 3.719596 |
| 9 | low_5d_avg | 3.022988 |
| 10 | sma_15 | 2.915010 |
| 11 | ema_9 | 2.899160 |
| 12 | low_10d_avg | 1.985008 |
| 13 | high_5d_avg | 1.745675 |
| 14 | open_10d_avg | 1.590243 |
| 15 | open_5d_avg | 1.561036 |
| 16 | open_15d_avg | 1.264441 |
| 17 | close_3d_ago | 0.798379 |
| 18 | close_7d_avg | 0.640924 |
| 19 | high_30d_avg | 0.200250 |
prediction_df['ridge_pred20'] = ridge_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | |
|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 |
lasso_model = Lasso()
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)
# Get the best model
best_lasso_model = grid_search.best_estimator_
# Make predictions on the test set
lasso_pred_base = best_lasso_model.predict(X_test_scaled)
# Evaluate the best model
mse = mean_squared_error(y_test, lasso_pred_base)
rmse = mean_squared_error(y_test, lasso_pred_base, squared=False)
mae = mean_absolute_error(y_test, lasso_pred_base)
r2 = r2_score(y_test, lasso_pred_base)
print("Best Lasso Regression Model:")
print(f"Best alpha: {best_lasso_model.alpha}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")
lasso_score = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
Best Lasso Regression Model: Best alpha: 0.001 Root Mean Squared Error (RMSE): 0.97 Mean Squared Error: 0.94 Mean Absolute Error: 0.663 R2 Score: 0.977
lasso_base_feature_importance = plot_feature_importance(best_lasso_model,X_train,20)
lasso_base_feature_importance[:20]
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 4.177865 |
| 1 | close_3d_avg | 1.314731 |
| 2 | macd | 1.312350 |
| 3 | macd_signal | 1.160117 |
| 4 | sma_15 | 1.006266 |
| 5 | adj close_3d_avg | 0.822478 |
| 6 | low_1d_ago | 0.722646 |
| 7 | close_3d_ago | 0.583764 |
| 8 | rsi | 0.543082 |
| 9 | open_3d_ago | 0.510853 |
| 10 | sma_30 | 0.492814 |
| 11 | high_1d_ago | 0.466770 |
| 12 | adj close_3d_ago | 0.422750 |
| 13 | low_15d_avg | 0.404237 |
| 14 | open_1d_ago | 0.365671 |
| 15 | high_14d_ago | 0.307277 |
| 16 | low_30d_avg | 0.291303 |
| 17 | sma_10 | 0.255037 |
| 18 | open_30d_avg | 0.241245 |
| 19 | high_30d_avg | 0.227913 |
prediction_df['lasso_pred_base'] = lasso_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | |
|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 |
keep_cols20 = lasso_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train model
lasso_model20 = Lasso(alpha=0.001)
lasso_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
lasso_pred20 = lasso_model20.predict(X_test_scaled20)
lasso_score20 = evaluate_regression_model(y_test, lasso_pred20)
Mean Squared Error (MSE): 0.948 Root Mean Squared Error (RMSE): 0.974 Mean Absolute Error (MAE): 0.665 R-squared (R2): 0.977
plot_feature_importance(lasso_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 4.017287 |
| 1 | close_3d_avg | 2.790781 |
| 2 | low_1d_ago | 1.047406 |
| 3 | close_3d_ago | 1.034968 |
| 4 | sma_30 | 1.029511 |
| 5 | low_15d_avg | 0.997798 |
| 6 | macd | 0.968169 |
| 7 | macd_signal | 0.880009 |
| 8 | high_14d_ago | 0.568316 |
| 9 | high_1d_ago | 0.533765 |
| 10 | rsi | 0.496376 |
| 11 | sma_10 | 0.423288 |
| 12 | adj close_3d_avg | 0.228550 |
| 13 | open_3d_ago | 0.204478 |
| 14 | open_1d_ago | 0.139447 |
| 15 | low_30d_avg | 0.025033 |
| 16 | sma_15 | 0.000000 |
| 17 | adj close_3d_ago | 0.000000 |
| 18 | open_30d_avg | 0.000000 |
| 19 | high_30d_avg | 0.000000 |
prediction_df['lasso_pred20'] = lasso_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 | 54.501734 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 | 54.262946 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 | 54.001497 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 | 53.859163 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 | 53.989285 |
elastic_net_model = ElasticNet()
# Define the hyperparameter grid to search
param_grid = {
'alpha': [0.001, 0.01, 0.1, 1, 10, 100],
'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
}
# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=elastic_net_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)
# Get the best model
best_elastic_net_model = grid_search.best_estimator_
# Make predictions on the test set
elastic_pred_base = best_elastic_net_model.predict(X_test_scaled)
# Evaluate the best model
mse = mean_squared_error(y_test, elastic_pred_base)
rmse = mean_squared_error(y_test, elastic_pred_base, squared=False)
mae = mean_absolute_error(y_test, elastic_pred_base)
r2 = r2_score(y_test, elastic_pred_base)
print("Best Elastic Net Model:")
print(f"Best alpha: {best_elastic_net_model.alpha}")
print(f"Best l1_ratio: {best_elastic_net_model.l1_ratio}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")
elastic_score = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
Best Elastic Net Model: Best alpha: 0.001 Best l1_ratio: 0.1 Root Mean Squared Error (RMSE): 0.953 Mean Squared Error: 0.908 Mean Absolute Error: 0.653 R2 Score: 0.978
elastic_base_feature_importance = plot_feature_importance(best_elastic_net_model,X_train,20)
elastic_base_feature_importance[:20]
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 1.613307 |
| 1 | sma_5 | 1.518359 |
| 2 | macd | 1.493389 |
| 3 | close_3d_avg | 1.290926 |
| 4 | sma_10 | 1.289912 |
| 5 | macd_signal | 1.253588 |
| 6 | adj close_3d_avg | 1.239816 |
| 7 | low_1d_ago | 0.982940 |
| 8 | sma_15 | 0.923173 |
| 9 | high_1d_ago | 0.815800 |
| 10 | close_3d_ago | 0.765736 |
| 11 | open_1d_ago | 0.631358 |
| 12 | sma_30 | 0.586684 |
| 13 | low_3d_avg | 0.580421 |
| 14 | adj close_3d_ago | 0.544949 |
| 15 | open_3d_ago | 0.531747 |
| 16 | rsi | 0.485403 |
| 17 | high_3d_avg | 0.431768 |
| 18 | close_1d_ago | 0.425664 |
| 19 | low_15d_avg | 0.399066 |
prediction_df['elastic_pred_base'] = elastic_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | elastic_pred_base | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 | 54.501734 | 54.444972 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 | 54.262946 | 54.198628 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 | 54.001497 | 54.011290 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 | 53.859163 | 53.818335 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 | 53.989285 | 54.037049 |
keep_cols20 = elastic_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train model
elastic_model20 = ElasticNet(alpha=0.001,l1_ratio = 0.9)
elastic_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
elastic_pred20 = elastic_model20.predict(X_test_scaled20)
elastic_score20 = evaluate_regression_model(y_test, elastic_pred20)
Mean Squared Error (MSE): 0.948 Root Mean Squared Error (RMSE): 0.974 Mean Absolute Error (MAE): 0.667 R-squared (R2): 0.977
plot_feature_importance(elastic_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 3.518012 |
| 1 | close_3d_avg | 2.975563 |
| 2 | low_15d_avg | 1.164547 |
| 3 | sma_30 | 1.008792 |
| 4 | low_1d_ago | 0.912273 |
| 5 | high_1d_ago | 0.840268 |
| 6 | sma_10 | 0.826143 |
| 7 | adj close_3d_avg | 0.783827 |
| 8 | macd_signal | 0.701467 |
| 9 | macd | 0.690383 |
| 10 | sma_15 | 0.565839 |
| 11 | adj close_3d_ago | 0.556647 |
| 12 | rsi | 0.506954 |
| 13 | close_3d_ago | 0.408292 |
| 14 | open_3d_ago | 0.238611 |
| 15 | high_3d_avg | 0.039417 |
| 16 | low_3d_avg | 0.000000 |
| 17 | sma_5 | 0.000000 |
| 18 | close_1d_ago | 0.000000 |
| 19 | open_1d_ago | 0.000000 |
prediction_df['elastic_pred20'] = elastic_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | elastic_pred_base | elastic_pred20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 | 54.501734 | 54.444972 | 54.503795 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 | 54.262946 | 54.198628 | 54.263269 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 | 54.001497 | 54.011290 | 54.055709 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 | 53.859163 | 53.818335 | 53.920259 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 | 53.989285 | 54.037049 | 54.058664 |
ela_df = pd.DataFrame([elastic_score.keys(),elastic_score.values()])
ela_df.columns = ela_df.iloc[0]
ela_df = ela_df[1:].reset_index(drop=True)
ela_df['Model'] = 'Elastic_Net with All Features'
ela_20_df = pd.DataFrame([elastic_score20.keys(),elastic_score20.values()])
ela_20_df.columns = ela_20_df.iloc[0]
ela_20_df = ela_20_df[1:].reset_index(drop=True)
ela_20_df['Model'] = 'Elastic_Net with Top 20 Features'
lasso_df = pd.DataFrame([lasso_score.keys(),lasso_score.values()])
lasso_df.columns = lasso_df.iloc[0]
lasso_df = lasso_df[1:].reset_index(drop=True)
lasso_df['Model'] = 'Lasso with All Features'
lasso_20_df = pd.DataFrame([lasso_score20.keys(),lasso_score20.values()])
lasso_20_df.columns = lasso_20_df.iloc[0]
lasso_20_df = lasso_20_df[1:].reset_index(drop=True)
lasso_20_df['Model'] = 'Lasso with Top 20 Features'
ridge_df = pd.DataFrame([ridge_score.keys(),ridge_score.values()])
ridge_df.columns = ridge_df.iloc[0]
ridge_df = ridge_df[1:].reset_index(drop=True)
ridge_df['Model'] = 'Ridge with All Features'
ridge_20_df = pd.DataFrame([ridge_score20.keys(),ridge_score20.values()])
ridge_20_df.columns = ridge_20_df.iloc[0]
ridge_20_df = ridge_20_df[1:].reset_index(drop=True)
ridge_20_df['Model'] = 'Ridge with Top 20 Features'
lr_base_df = pd.DataFrame([lr_score_base.keys(),lr_score_base.values()])
lr_base_df.columns = lr_base_df.iloc[0]
lr_base_df = lr_base_df[1:].reset_index(drop=True)
lr_base_df['Model'] = 'Linear Reg. with All Features'
lr_20_df = pd.DataFrame([lr_score20.keys(),lr_score20.values()])
lr_20_df.columns = lr_20_df.iloc[0]
lr_20_df = lr_20_df[1:].reset_index(drop=True)
lr_20_df['Model'] = 'Linear Reg. with Top 20 Features'
lr_15_df = pd.DataFrame([lr_score15.keys(),lr_score15.values()])
lr_15_df.columns = lr_15_df.iloc[0]
lr_15_df = lr_15_df[1:].reset_index(drop=True)
lr_15_df['Model'] = 'Linear Reg. with Top 15 Features'
lr_10_df = pd.DataFrame([lr_score10.keys(),lr_score10.values()])
lr_10_df.columns = lr_10_df.iloc[0]
lr_10_df = lr_10_df[1:].reset_index(drop=True)
lr_10_df['Model'] = 'Linear Reg. with Top 10 Features'
df_compare = pd.concat([ela_df,lasso_df,ridge_df,ela_20_df,lasso_20_df,ridge_20_df,
lr_base_df,lr_20_df,lr_15_df,lr_10_df]).sort_values(by=['R2'],ascending=False).reset_index(drop=True)
df_compare
| MSE | RMSE | MAE | R2 | Model | |
|---|---|---|---|---|---|
| 0 | 0.729511 | 0.854114 | 0.585601 | 0.982168 | Ridge with Top 20 Features |
| 1 | 0.758611 | 0.870983 | 0.605987 | 0.981457 | Ridge with All Features |
| 2 | 0.768289 | 0.876521 | 0.613346 | 0.98122 | Linear Reg. with Top 20 Features |
| 3 | 0.77558 | 0.88067 | 0.613905 | 0.981042 | Linear Reg. with All Features |
| 4 | 0.786499 | 0.886848 | 0.626022 | 0.980775 | Linear Reg. with Top 10 Features |
| 5 | 0.796893 | 0.892688 | 0.626154 | 0.980521 | Linear Reg. with Top 15 Features |
| 6 | 0.90796 | 0.952869 | 0.652726 | 0.977806 | Elastic_Net with All Features |
| 7 | 0.940304 | 0.969693 | 0.663218 | 0.977015 | Lasso with All Features |
| 8 | 0.948047 | 0.973677 | 0.664517 | 0.976826 | Lasso with Top 20 Features |
| 9 | 0.948242 | 0.973777 | 0.666597 | 0.976821 | Elastic_Net with Top 20 Features |
After retraining the models with different alpha and input features, Ridge regression model with alpha 0.001 and all features performed best among others.
MSE measures the average squared difference between predicted and actual values. In this case, the MSE of 0.729511 is relatively low, indicating that, on average, the squared errors between predicted and actual values are small. Lower MSE values suggest better accuracy.
RMSE is the square root of the MSE and provides a measure of the average magnitude of the errors. A lower RMSE (0.854114) signifies that, on average, the model's predictions are close to the actual values. It is in the same unit as the target variable.
MAE measures the average absolute difference between predicted and actual values. With an MAE of 0.585601, the model's predictions, on average, deviate by approximately 0.60886 units from the actual values. Lower MAE values indicate better accuracy.
R2 represents the proportion of variance in the target variable that is predictable from the independent variables. An R2 value of 0.982168 is exceptionally high, indicating that the model explains about 98.21% of the variance in the closing stock prices. A higher R2 value suggests a better accuracy.
In summary, the provided accuracy scores collectively suggest that the model performs exceptionally well. The low MSE, RMSE, MAE and high R2 score indicate that the model's predictions are close to the actual values.
prediction_df
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | elastic_pred_base | elastic_pred20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.157799 | 54.239022 | 54.554907 | 54.590333 | 54.205840 | 54.349344 | 54.496024 | 54.501734 | 54.444972 | 54.503795 |
| 1730 | 2020-01-03 | 54.150002 | 54.553547 | 54.520826 | 54.558027 | 54.511431 | 54.542331 | 54.381596 | 54.190970 | 54.262946 | 54.198628 | 54.263269 |
| 1731 | 2020-01-06 | 53.919998 | 54.336899 | 54.065422 | 54.148986 | 54.126188 | 54.345656 | 54.233187 | 53.975755 | 54.001497 | 54.011290 | 54.055709 |
| 1732 | 2020-01-07 | 54.049999 | 53.907121 | 54.067429 | 53.903359 | 53.992480 | 53.873857 | 54.036503 | 53.828707 | 53.859163 | 53.818335 | 53.920259 |
| 1733 | 2020-01-08 | 54.189999 | 54.192608 | 53.987340 | 53.942897 | 54.081508 | 54.195624 | 54.059649 | 54.002350 | 53.989285 | 54.037049 | 54.058664 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2694 | 2023-11-01 | 67.970001 | 67.251327 | 66.691627 | 66.293326 | 66.511564 | 67.298025 | 66.970336 | 66.962254 | 67.016646 | 67.047164 | 67.123766 |
| 2695 | 2023-11-02 | 68.820000 | 68.295223 | 67.635666 | 67.398789 | 67.681439 | 68.052648 | 67.865085 | 67.502858 | 67.485406 | 67.584452 | 67.672327 |
| 2696 | 2023-11-03 | 68.239998 | 68.864264 | 68.759090 | 68.689217 | 68.943305 | 68.811739 | 68.806564 | 68.155329 | 68.270610 | 68.171208 | 68.378378 |
| 2697 | 2023-11-06 | 68.489998 | 68.041446 | 68.134383 | 68.593341 | 68.842344 | 68.125227 | 68.286008 | 68.186065 | 68.384744 | 68.222377 | 68.472590 |
| 2698 | 2023-11-07 | 69.019997 | 68.239220 | 68.843554 | 69.136339 | 69.178896 | 68.336189 | 68.670691 | 68.368078 | 68.539077 | 68.463381 | 68.531877 |
970 rows × 12 columns
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred_base,label='lr_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred20,label='lr_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred15,label='lr_pred15')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred10,label='lr_pred10')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred_base,label='ridge_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred20,label='ridge_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred_base,label='lasso_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred20,label='lasso_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred_base,label='elastic_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred20,label='elastic_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred_base,label='lr_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred20,label='lr_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred15,label='lr_pred15')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred10,label='lr_pred10')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred_base,label='ridge_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred20,label='ridge_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred_base,label='lasso_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred20,label='lasso_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred_base,label='elastic_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred20,label='elastic_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
# target column is next day's close price
y_train = train_df['close_1d_next'].copy()
X_train = train_df.drop(['close_1d_next'], 1)
# target column is next day's close price
y_test = test_df['close_1d_next'].copy()
X_test = test_df.drop(['close_1d_next'], 1)
ridge_20_features = ridge_base_feature_importance[:20]['Feature'].tolist()
X_train = X_train[ridge_20_features]
X_test = X_test[ridge_20_features]
def train_ridge_regression(X_train,X_test,y_train,y_test):
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Train model
ridge_model = Ridge(alpha=0.001)
ridge_model.fit(X_train_scaled, y_train)
# Make predictions on the scaled test set
ridge_pred = ridge_model.predict(X_test_scaled)
ridge_score = evaluate_regression_model2(y_test, ridge_pred)
return ridge_model,ridge_pred,ridge_score
ridge_model, ridge_pred, ridge_score = train_ridge_regression(X_train,X_test,y_train,y_test)
ridge_score
{'MSE': 0.7295114881722916,
'RMSE': 0.8541144467647714,
'MAE': 0.5856010765186319,
'R2': 0.9821678541358965}
ridge_pred[:15]
array([54.34934416, 54.38159609, 54.23318706, 54.03650291, 54.05964943,
54.17549586, 54.25017275, 54.54902816, 54.18781481, 54.74202716,
55.14666847, 55.18770245, 55.53205975, 55.4774224 , 55.51006433])
plot_regression_accuracy(y_test, ridge_pred)
plot_predictions(df,ridge_pred)
plot_feature_importance(ridge_model,X_train,20)
| Feature | Importance | |
|---|---|---|
| 0 | sma_5 | 25.441510 |
| 1 | close_5d_avg | 22.435160 |
| 2 | adj close_5d_avg | 11.490239 |
| 3 | adj close_1d_ago | 5.671591 |
| 4 | adj close_10d_avg | 5.507894 |
| 5 | close_5d_ago | 5.147037 |
| 6 | close_1d_ago | 4.554521 |
| 7 | close_10d_avg | 4.509604 |
| 8 | close_15d_avg | 3.719596 |
| 9 | low_5d_avg | 3.022988 |
| 10 | sma_15 | 2.915010 |
| 11 | ema_9 | 2.899160 |
| 12 | low_10d_avg | 1.985008 |
| 13 | high_5d_avg | 1.745675 |
| 14 | open_10d_avg | 1.590243 |
| 15 | open_5d_avg | 1.561036 |
| 16 | open_15d_avg | 1.264441 |
| 17 | close_3d_ago | 0.798379 |
| 18 | close_7d_avg | 0.640924 |
| 19 | high_30d_avg | 0.200250 |
The residual, scatter, and time series line charts above clearly show that the predicted values are very close to the actual values. These visualizations confirm that the model is very good at making accurate predictions, highlighting its strong performance and reliability in understanding the details of the data.
def preprocess_data(df):
df['ema_9'] = df['close'].ewm(9).mean().shift()
df['sma_5'] = df['close'].rolling(5).mean().shift()
df['sma_10'] = df['close'].rolling(10).mean().shift()
df['sma_15'] = df['close'].rolling(15).mean().shift()
df['sma_30'] = df['close'].rolling(30).mean().shift()
df['rsi'] = rsi(df) #.fillna(0)
df['mfi'] = mfi(df, 14)
EMA_12 = pd.Series(df['close'].ewm(span=12, min_periods=12).mean())
EMA_26 = pd.Series(df['close'].ewm(span=26, min_periods=26).mean())
df['macd'] = pd.Series(EMA_12 - EMA_26)
df['macd_signal'] = pd.Series(df.macd.ewm(span=9, min_periods=9).mean())
df['close_1d_next'] = df['close'].shift(-1)
df['close_1d_ago'] = df['close'].shift(1)
df['close_3d_ago'] = df['close'].shift(3)
df['close_5d_ago'] = df['close'].shift(5)
df['close_1w_ago'] = df['close'].shift(7)
df['close_2w_ago'] = df['close'].shift(14)
df['close_3w_ago'] = df['close'].shift(21)
df['close_4w_ago'] = df['close'].shift(28)
df['adj_close_1d_ago'] = df['adj close'].shift(1)
df['adj_close_3d_ago'] = df['adj close'].shift(3)
df['adj_close_5d_ago'] = df['adj close'].shift(5)
df['adj_close_1w_ago'] = df['adj close'].shift(7)
df['adj_close_2w_ago'] = df['adj close'].shift(14)
df['adj_close_3w_ago'] = df['adj close'].shift(21)
df['adj_close_4w_ago'] = df['adj close'].shift(28)
df['open_1d_ago'] = df['open'].shift(1)
df['open_3d_ago'] = df['open'].shift(3)
df['open_5d_ago'] = df['open'].shift(5)
df['open_1w_ago'] = df['open'].shift(7)
df['open_2w_ago'] = df['open'].shift(14)
df['open_3w_ago'] = df['open'].shift(21)
df['open_4w_ago'] = df['open'].shift(28)
df['high_1d_ago'] = df['high'].shift(1)
df['high_3d_ago'] = df['high'].shift(3)
df['high_5d_ago'] = df['high'].shift(5)
df['high_1w_ago'] = df['high'].shift(7)
df['high_2w_ago'] = df['high'].shift(14)
df['high_3w_ago'] = df['high'].shift(21)
df['high_4w_ago'] = df['high'].shift(28)
df['low_1d_ago'] = df['low'].shift(1)
df['low_3d_ago'] = df['low'].shift(3)
df['low_5d_ago'] = df['low'].shift(5)
df['low_1w_ago'] = df['low'].shift(7)
df['low_2w_ago'] = df['low'].shift(14)
df['low_3w_ago'] = df['low'].shift(21)
df['low_4w_ago'] = df['low'].shift(28)
df['volume_1d_ago'] = df['volume'].shift(1)
df['volume_3d_ago'] = df['volume'].shift(3)
df['volume_5d_ago'] = df['volume'].shift(5)
df['volume_1w_ago'] = df['volume'].shift(7)
df['volume_2w_ago'] = df['volume'].shift(14)
df['volume_3w_ago'] = df['volume'].shift(21)
df['volume_4w_ago'] = df['volume'].shift(28)
df['open_3d_avg'] = df['open'].rolling(window=3).mean()
df['open_5d_avg'] = df['open'].rolling(window=5).mean()
df['open_7d_avg'] = df['open'].rolling(window=7).mean()
df['open_10d_avg'] = df['open'].rolling(window=10).mean()
df['open_15d_avg'] = df['open'].rolling(window=15).mean()
df['open_30d_avg'] = df['open'].rolling(window=30).mean()
df['high_3d_avg'] = df['high'].rolling(window=3).mean()
df['high_5d_avg'] = df['high'].rolling(window=5).mean()
df['high_7d_avg'] = df['high'].rolling(window=7).mean()
df['high_10d_avg'] = df['high'].rolling(window=10).mean()
df['high_15d_avg'] = df['high'].rolling(window=15).mean()
df['high_30d_avg'] = df['high'].rolling(window=30).mean()
df['low_3d_avg'] = df['low'].rolling(window=3).mean()
df['low_5d_avg'] = df['low'].rolling(window=5).mean()
df['low_7d_avg'] = df['low'].rolling(window=7).mean()
df['low_10d_avg'] = df['low'].rolling(window=10).mean()
df['low_15d_avg'] = df['low'].rolling(window=15).mean()
df['low_30d_avg'] = df['low'].rolling(window=30).mean()
df['volume_3d_avg'] = df['volume'].rolling(window=3).mean()
df['volume_5d_avg'] = df['volume'].rolling(window=5).mean()
df['volume_7d_avg'] = df['volume'].rolling(window=7).mean()
df['volume_10d_avg'] = df['volume'].rolling(window=10).mean()
df['volume_15d_avg'] = df['volume'].rolling(window=15).mean()
df['volume_30d_avg'] = df['volume'].rolling(window=30).mean()
df['adj_close_3d_avg'] = df['adj close'].rolling(window=3).mean()
df['adj_close_5d_avg'] = df['adj close'].rolling(window=5).mean()
df['adj_close_7d_avg'] = df['adj close'].rolling(window=7).mean()
df['adj_close_10d_avg'] = df['adj close'].rolling(window=10).mean()
df['adj_close_15d_avg'] = df['adj close'].rolling(window=15).mean()
df['adj_close_30d_avg'] = df['adj close'].rolling(window=30).mean()
return df
df_all = pd.read_parquet(out_loc+"stock_1d.parquet")
df_all.columns = df_all.columns.str.lower()
### keep stocks in data with min year 2013, max year 2023
stock_min_dt = pd.DataFrame(df_all.groupby('symbol')['date'].min()).reset_index().rename(columns={'date':'min_date'})
stock_max_dt = pd.DataFrame(df_all.groupby('symbol')['date'].max()).reset_index().rename(columns={'date':'max_date'})
stock_cnt_dt = pd.DataFrame(df_all.groupby('symbol')['date'].count()).reset_index().rename(columns={'date':'days_cnt'})
stock_cnt = stock_min_dt.merge(stock_max_dt,on='symbol').merge(stock_cnt_dt,on='symbol')
stock_cnt['min_year'] = stock_cnt['min_date'].dt.year
stock_cnt['max_year'] = stock_cnt['max_date'].dt.year
keep_stocks = stock_cnt[(stock_cnt['min_year']==2013)&(stock_cnt['max_year']==2023)&(stock_cnt['days_cnt']>=2500)]['symbol'].unique().tolist()
stock_cnt.head()
| symbol | min_date | max_date | days_cnt | min_year | max_year | |
|---|---|---|---|---|---|---|
| 0 | A | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 1 | AAL | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 2 | AAPL | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 3 | ABBV | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 4 | ABNB | 2020-12-10 | 2023-11-08 | 733 | 2020 | 2023 |
df_2023 = df_all[(df_all.date.dt.year==2023) & (df_all.symbol.isin(keep_stocks))]
# volume vs stocks
volume_2023 = pd.DataFrame(df_2023.groupby(['symbol','security','gics sector'])['volume'].sum()).reset_index()
volume_2023 = volume_2023.sort_values(by='volume',ascending=False).reset_index(drop=True)
volume_2023.head()
| symbol | security | gics sector | volume | |
|---|---|---|---|---|
| 0 | TSLA | Tesla, Inc. | Consumer Discretionary | 3.009291e+10 |
| 1 | AMD | AMD | Information Technology | 1.342035e+10 |
| 2 | AMZN | Amazon | Consumer Discretionary | 1.305160e+10 |
| 3 | AAPL | Apple Inc. | Information Technology | 1.303964e+10 |
| 4 | F | Ford Motor Company | Consumer Discretionary | 1.278319e+10 |
# volume vs sectors
sector_2023 = pd.DataFrame(df_2023.groupby(['gics sector'])['volume'].sum()).reset_index()
sector_2023 = sector_2023.sort_values(by='volume',ascending=False).reset_index(drop=True)
sector_2023
| gics sector | volume | |
|---|---|---|
| 0 | Consumer Discretionary | 9.171407e+10 |
| 1 | Information Technology | 8.888840e+10 |
| 2 | Financials | 6.728113e+10 |
| 3 | Communication Services | 5.267892e+10 |
| 4 | Health Care | 3.755560e+10 |
| 5 | Industrials | 3.672492e+10 |
| 6 | Energy | 3.245171e+10 |
| 7 | Consumer Staples | 2.824873e+10 |
| 8 | Utilities | 2.214882e+10 |
| 9 | Materials | 1.432867e+10 |
| 10 | Real Estate | 1.318748e+10 |
# filter top 5 sectors with highest volume in 2023
sector_list = sector_2023[:5]['gics sector'].tolist()
stock_list = []
num_stocks = 5
# stocks with highest volume in each sector
for sec in sector_list:
stock_list.append(volume_2023[volume_2023['gics sector']==sec]['symbol'][:num_stocks].tolist())
stock_list = [item for sublist in stock_list for item in sublist]
len(stock_list)
25
df_stocks = df_all[df_all['symbol'].isin(stock_list)].reset_index(drop=True)
df_stocks.head()
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2013-01-02 | 18.003504 | 18.193193 | 17.931683 | 18.099348 | 18.099348 | 101550348.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 1 | 2013-01-03 | 18.141392 | 18.316566 | 18.036036 | 18.109859 | 18.109859 | 92635272.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 2 | 2013-01-04 | 18.251753 | 18.555305 | 18.210211 | 18.467718 | 18.467718 | 110429460.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 3 | 2013-01-07 | 18.404655 | 18.503002 | 18.282784 | 18.387136 | 18.387136 | 66161772.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 4 | 2013-01-08 | 18.406906 | 18.425926 | 18.128880 | 18.350851 | 18.350851 | 66976956.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
stock_compare = []
for stock in stock_list:
stock_data = df_stocks[df_stocks['symbol'] == stock]
stock_data = preprocess_data(stock_data)
stock_data = stock_data.dropna().reset_index(drop=True)
# Split the DataFrame into training and testing sets
train_df_temp = stock_data[stock_data.date.dt.year<2020]
test_df_temp = stock_data[stock_data.date.dt.year>=2020]
drop_cols1 = ['date','open','high','low','close','adj close','volume','symbol','security',
'gics sector','gics sub-industry','headquarters location','date added','cik','founded']
train_df_temp = train_df_temp.drop(drop_cols1, 1)
test_df_temp = test_df_temp.drop(drop_cols1, 1)
# target column is next day's close price
y_train_temp = train_df_temp['close_1d_next'].copy()
X_train_temp = train_df_temp.drop(['close_1d_next'], 1)
# target column is next day's close price
y_test_temp = test_df_temp['close_1d_next'].copy()
X_test_temp = test_df_temp.drop(['close_1d_next'], 1)
# print(stock, len(X_train), len(X_test), len(y_train), len(y_test))
temp_model, temp_pred, temp_score = train_ridge_regression(X_train_temp,X_test_temp,y_train_temp,y_test_temp)
score_df = pd.DataFrame([temp_score.keys(),temp_score.values()])
score_df.columns = score_df.iloc[0]
score_df = score_df[1:].reset_index(drop=True)
score_df['symbol'] = stock
stock_compare.append(score_df)
compare_df = pd.concat(stock_compare).sort_values(by='R2',ascending=False).reset_index(drop =True)
compare_df
| MSE | RMSE | MAE | R2 | symbol | |
|---|---|---|---|---|---|
| 0 | 56.919689 | 7.544514 | 5.112978 | 0.995061 | NVDA |
| 1 | 0.559182 | 0.747785 | 0.529467 | 0.993251 | VZ |
| 2 | 7.770865 | 2.787627 | 2.107652 | 0.992581 | AAPL |
| 3 | 5.039335 | 2.244846 | 1.670374 | 0.992108 | GOOG |
| 4 | 4.98575 | 2.232879 | 1.654225 | 0.992002 | GOOGL |
| 5 | 46.381058 | 6.810364 | 4.690443 | 0.990596 | META |
| 6 | 2.057386 | 1.434359 | 1.020137 | 0.990386 | CVS |
| 7 | 0.147953 | 0.384647 | 0.271314 | 0.990362 | F |
| 8 | 1.27562 | 1.129434 | 0.839937 | 0.98989 | GM |
| 9 | 28.055837 | 5.296776 | 3.992424 | 0.989314 | MSFT |
| 10 | 84.835254 | 9.210606 | 6.497099 | 0.988764 | TSLA |
| 11 | 0.575533 | 0.758639 | 0.55086 | 0.988709 | PFE |
| 12 | 0.581909 | 0.76283 | 0.577919 | 0.988411 | BAC |
| 13 | 0.26222 | 0.512075 | 0.377068 | 0.988041 | KEY |
| 14 | 1.80572 | 1.343771 | 0.958788 | 0.987257 | INTC |
| 15 | 0.991949 | 0.995966 | 0.734726 | 0.987031 | WFC |
| 16 | 11.147927 | 3.338851 | 2.469883 | 0.985621 | AMZN |
| 17 | 0.174755 | 0.418037 | 0.281833 | 0.983153 | T |
| 18 | 10.145635 | 3.185221 | 2.328928 | 0.982675 | AMD |
| 19 | 2.409729 | 1.55233 | 1.141577 | 0.981822 | C |
| 20 | 0.124353 | 0.352637 | 0.259468 | 0.979688 | HBAN |
| 21 | 0.870412 | 0.932958 | 0.684029 | 0.978487 | BMY |
| 22 | 2.087087 | 1.444675 | 1.154954 | 0.96863 | CCL |
| 23 | 4.398395 | 2.097235 | 1.52279 | 0.967914 | JNJ |
| 24 | 4.070663 | 2.017589 | 1.528408 | 0.589411 | VTRS |
The final phase of the project involved applying the developed model to real-world scenarios. By identifying the top 5 industries with the highest volume in 2023, we ensured that our predictions were grounded in current market dynamics. The subsequent selection of 5 stocks within each industry added a layer of practicality to our findings.
The model's stellar performance on NVDA, AAPL, VZ, GOOG, and GOOGL proved its robustness in diverse market conditions. Simultaneously, the challenges encountered with VTRS opened up opportunities for further investigation into the factors contributing to its underperformance.